# Create Model Selection Tables

### Description

This function creates a model selection table based on one of the
following information criteria: AIC, AICc, QAIC, QAICc. The table
ranks the models based on the selected information criteria and also
provides delta AIC and Akaike weights. `aictab`

selects the
appropriate function to create the model selection table based on the
object class. The current version works with lists containing objects
of `aov`

, `betareg`

, `clm`

, `clmm`

, `clogit`

,
`coxme`

, `coxph`

, `fitdist`

, `fitdistr`

, `glm`

,
`gls`

, `gnls`

, `hurdle`

, `lavaan`

, `lm`

,
`lme`

, `lmekin`

, `maxlikeFit`

, `mer`

, `merMod`

,
`multinom`

, `nlme`

, `nls`

, `polr`

, `rlm`

,
`survreg`

, `vglm`

, and `zeroinfl`

classes as well as
various models of `unmarkedFit`

classes but does not yet allow
mixing of different classes.

### Usage

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | ```
aictab(cand.set, modnames = NULL, second.ord = TRUE, nobs = NULL,
sort = TRUE, ...)
## S3 method for class 'AICaov.lm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICbetareg'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICsclm.clm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICclmm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICclm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICcoxme'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICcoxph'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICfitdist'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICfitdistr'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICglm.lm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICgls'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICgnls.gls'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIChurdle'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClavaan'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClme'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClmekin'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICmaxlikeFit.list'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICmer'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AIClmerMod'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICglmerMod'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICnlmerMod'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICmultinom.nnet'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICnlme.lme'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICnls'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICpolr'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICrlm.lm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICsurvreg'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
## S3 method for class 'AICunmarkedFitOccu'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitColExt'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitOccuRN'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitPCount'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitPCO'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitDS'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitGDS'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitOccuFP'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitMPois'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitGMM'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICunmarkedFitGPC'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICvglm'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, c.hat = 1, ...)
## S3 method for class 'AICzeroinfl'
aictab(cand.set, modnames = NULL,
second.ord = TRUE, nobs = NULL, sort = TRUE, ...)
``` |

### Arguments

`cand.set` |
a list storing each of the models in the candidate model set. |

`modnames` |
a character vector of model names to facilitate the identification of
each model in the model selection table. If |

`second.ord` |
logical. If |

`nobs` |
this argument allows to specify a numeric value other than total sample
size to compute the AICc (i.e., |

`sort` |
logical. If |

`c.hat` |
value of overdispersion parameter (i.e., variance inflation factor) such
as that obtained from |

`...` |
additional arguments passed to the function. |

### Details

`aictab`

internally creates a new class for the `cand.set`

list of candidate models, according to the contents of the list. The
current function is implemented for `clogit`

, `coxme`

,
`coxph`

, `fitdist`

, `fitdistr`

, `glm`

, `gls`

,
`gnls`

, `hurdle`

, `lavaan`

, `lm`

, `lme`

,
`lmekin`

, `maxlikeFit`

, `mer`

, `merMod`

,
`multinom`

, `nlme`

, `nls`

, `polr`

, `rlm`

,
`survreg`

, `vglm`

, and `zeroinfl`

classes as well as
various `unmarkedFit`

classes. The function constructs a model
selection table based on one of the four information criteria: AIC,
AICc, QAIC, and QAICc.

Ten guidelines for model selection:

1) Carefully construct your candidate model set. Each model should represent a specific (interesting) hypothesis to test.

2) Keep your candidate model set short. It is ill-advised to consider as many models as there are data.

3) Check model fit. Use your global model (most complex model) or subglobal models to determine if the assumptions are valid. If none of your models fit the data well, information criteria will only indicate the most parsimonious of the poor models.

4) Avoid data dredging (i.e., looking for patterns after an initial round of analysis).

5) Avoid overfitting models. You should not estimate too many parameters for the number of observations available in the sample.

6) Be careful of missing values. Remember that values that are missing only for certain variables change the data set and sample size, depending on which variable is included in any given model. I suggest to remove missing cases before starting model selection.

7) Use the same response variable for all models of the candidate model set. It is inappropriate to run some models with a transformed response variable and others with the untransformed variable. A workaround is to use a different link function for some models (e.g., identity vs log link).

8) When dealing with models with overdispersion, use the same value of c-hat for all models in the candidate model set. For binomial models with trials > 1 (i.e., success/trial or cbind(success, failure) syntax) or with Poisson GLM's, you should estimate the c-hat from the most complex model (global model). If c-hat > 1, you should use the same value for each model of the candidate model set (where appropriate) and include it in the count of parameters (K). Similarly, for negative binomial models, you should estimate the dispersion parameter from the global model and use the same value across all models.

9) Burnham and Anderson (2002) recommend to avoid mixing the information-theoretic approach and notions of significance (i.e., P values). It is best to provide estimates and a measure of their precision (standard error, confidence intervals).

10) Determining the ranking of the models is just the first step. Akaike weights sum to 1 for the entire model set and can be interpreted as the weight of evidence in favor of a given model being the best one given the candidate model set considered and the data at hand. Models with large Akaike weights have strong support. Evidence ratios, importance values, and confidence sets for the best model are all measures that assist in interpretation. In cases where the top ranking model has an Akaike weight > 0.9, one can base inference on this single most parsimonious model. When many models rank highly (i.e., delta (Q)AIC(c) < 4), one should model-average effect sizes for the parameters with most support across the entire set of models. Model averaging consists in making inference based on the whole set of candidate models, instead of basing conclusions on a single 'best' model. It is an elegant way of making inference based on the information contained in the entire model set.

### Value

`aictab`

creates an object of class `aictab`

with the
following components:

`Modname` |
the names of each model of the candidate model set. |

`K ` |
the number of estimated parameters for each model. |

`(Q)AIC(c) ` |
the information criterion requested for each model (AIC, AICc, QAIC, QAICc). |

`Delta_(Q)AIC(c) ` |
the appropriate delta AIC component depending on the information criteria selected. |

`ModelLik ` |
the relative likelihood of the model given the data (exp(-0.5*delta[i])). This is not to be confused with the likelihood of the parameters given the data. The relative likelihood can then be normalized across all models to get the model probabilities. |

`(Q)AIC(c)Wt` |
the Akaike weights, also termed "model probabilities" sensu Burnham and Anderson (2002) and Anderson (2008). These measures indicate the level of support (i.e., weight of evidence) in favor of any given model being the most parsimonious among the candidate model set. |

`Cum.Wt ` |
the cumulative Akaike weights. These are only meaningful
if results in table are sorted in decreasing order of Akaike weights
(i.e., |

`c.hat` |
if c.hat was specified as an argument, it is included in the table. |

`LL` |
if c.hat = 1 and parameters estimated by maximum likelihood, the log-likelihood of each model. |

`Quasi.LL` |
if c.hat > 1, the quasi log-likelihood of each model. |

`Res.LL` |
if parameters are estimated by restricted maximum-likelihood (REML), the restricted log-likelihood of each model. |

### Author(s)

Marc J. Mazerolle

### References

Anderson, D. R. (2008) *Model-based Inference in the Life Sciences:
a primer on evidence*. Springer: New York.

Burnham, K. P., Anderson, D. R. (2002) *Model Selection and
Multimodel Inference: a practical information-theoretic
approach*. Second edition. Springer: New York.

Burnham, K. P., Anderson, D. R. (2004) Multimodel inference:
understanding AIC and BIC in model selection. *Sociological Methods
and Research* **33**, 261–304.

Dail, D., Madsen, L. (2011) Models for estimating abundance from
repeated counts of an open population. *Biometrics* **67**,
577–587.

MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle,
J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when
detection probabilities are less than one. *Ecology* **83**,
2248–2255.

MacKenzie, D. I., Nichols, J. D., Hines, J. E., Knutson, M. G.,
Franklin, A. B. (2003) Estimating site occupancy, colonization, and
local extinction when a species is detected imperfectly. *Ecology*
**84**, 2200–2207.

Mazerolle, M. J. (2006) Improving data analysis in herpetology: using
Akaike's Information Criterion (AIC) to assess the strength of
biological hypotheses. *Amphibia-Reptilia* **27**, 169–180.

Royle, J. A. (2004) *N*-mixture models for estimating population
size from spatially replicated counts. *Biometrics* **60**,
108–115.

### See Also

`AICc`

, `aictabCustom`

, `bictab`

,
`confset`

, `c_hat`

, `evidence`

,
`importance`

, `modavg`

,
`modavgEffect`

, `modavgShrink`

,
`modavgPred`

### Examples

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | ```
##Mazerolle (2006) frog water loss example
data(dry.frog)
##setup a subset of models of Table 1
Cand.models <- list( )
Cand.models[[1]] <- lm(log_Mass_lost ~ Shade + Substrate +
cent_Initial_mass + Initial_mass2,
data = dry.frog)
Cand.models[[2]] <- lm(log_Mass_lost ~ Shade + Substrate +
cent_Initial_mass + Initial_mass2 +
Shade:Substrate, data = dry.frog)
Cand.models[[3]] <- lm(log_Mass_lost ~ cent_Initial_mass +
Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
Initial_mass2, data = dry.frog)
Cand.models[[5]] <- lm(log_Mass_lost ~ Substrate + cent_Initial_mass +
Initial_mass2, data = dry.frog)
##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")
##generate AICc table
aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)
##round to 4 digits after decimal point and give log-likelihood
print(aictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE),
digits = 4, LL = TRUE)
## Not run:
##Burnham and Anderson (2002) flour beetle data
data(beetle)
##models as suggested by Burnham and Anderson p. 198
Cand.set <- list( )
Cand.set[[1]] <- glm(Mortality_rate ~ Dose, family =
binomial(link = "logit"), weights = Number_tested,
data = beetle)
Cand.set[[2]] <- glm(Mortality_rate ~ Dose, family =
binomial(link = "probit"), weights = Number_tested,
data = beetle)
Cand.set[[3]] <- glm(Mortality_rate ~ Dose, family =
binomial(link ="cloglog"), weights = Number_tested,
data = beetle)
##check c-hat
c_hat(Cand.set[[1]])
c_hat(Cand.set[[2]])
c_hat(Cand.set[[3]])
##lowest value of c-hat < 1 for these non-nested models, thus use
##c.hat = 1
##set up named list
names(Cand.set) <- c("logit", "probit", "cloglog")
##compare models
##model names will be taken from the list if modnames is not specified
res.table <- aictab(cand.set = Cand.set, second.ord = FALSE)
##note that delta AIC and Akaike weights are identical to Table 4.7
print(res.table, digits = 2, LL = TRUE) #print table with 2 digits and
##print log-likelihood in table
print(res.table, digits = 4, LL = FALSE) #print table with 4 digits and
##do not print log-likelihood
## End(Not run)
##two-way ANOVA with interaction
data(iron)
##full model
m1 <- lm(Iron ~ Pot + Food + Pot:Food, data = iron)
##additive model
m2 <- lm(Iron ~ Pot + Food, data = iron)
##null model
m3 <- lm(Iron ~ 1, data = iron)
##candidate models
Cand.aov <- list(m1, m2, m3)
Cand.names <- c("full", "additive", "null")
aictab(Cand.aov, Cand.names)
##single-season occupancy model example modified from ?occu
## Not run:
require(unmarked)
##single season example modified from ?occu
data(frogs)
pferUMF <- unmarkedFrameOccu(pfer.bin)
##add fake covariates
siteCovs(pferUMF) <- data.frame(sitevar1 = rnorm(numSites(pferUMF)),
sitevar2 = runif(numSites(pferUMF)))
##observation covariates
obsCovs(pferUMF) <- data.frame(obsvar1 = rnorm(numSites(pferUMF) *
obsNum(pferUMF)))
##set up candidate model set
fm1 <- occu(~ obsvar1 ~ sitevar1, pferUMF)
fm2 <- occu(~ 1 ~ sitevar1, pferUMF)
fm3 <- occu(~ obsvar1 ~ sitevar2, pferUMF)
fm4 <- occu(~ 1 ~ sitevar2, pferUMF)
##assemble models in named list (alternative to using 'modnames' argument)
Cand.mods <- list("fm1" = fm1, "fm2" = fm2, "fm3" = fm3, "fm4" = fm4)
##compute table
aictab(cand.set = Cand.mods, second.ord = TRUE)
detach(package:unmarked)
## End(Not run)
``` |