View source: R/modelselection.R
modelsearch  R Documentation 
Function modelsearch()
runs exhaustive model building and selection
for each vital rate needed to estimate a functionbased MPM or IPM. It
returns bestfit models for each vital rate, model table showing all models
tested, and model quality control data. The final output can be used as input
in other functions within this package.
modelsearch(
data,
stageframe = NULL,
historical = TRUE,
approach = "mixed",
suite = "size",
bestfit = "AICc&k",
vitalrates = c("surv", "size", "fec"),
surv = c("alive3", "alive2", "alive1"),
obs = c("obsstatus3", "obsstatus2", "obsstatus1"),
size = c("sizea3", "sizea2", "sizea1"),
sizeb = c(NA, NA, NA),
sizec = c(NA, NA, NA),
repst = c("repstatus3", "repstatus2", "repstatus1"),
fec = c("feca3", "feca2", "feca1"),
stage = c("stage3", "stage2", "stage1"),
matstat = c("matstatus3", "matstatus2", "matstatus1"),
indiv = "individ",
patch = NA,
year = "year2",
density = NA,
test.density = FALSE,
sizedist = "gaussian",
sizebdist = NA,
sizecdist = NA,
fecdist = "gaussian",
size.zero = FALSE,
sizeb.zero = FALSE,
sizec.zero = FALSE,
size.trunc = FALSE,
sizeb.trunc = FALSE,
sizec.trunc = FALSE,
fec.zero = FALSE,
fec.trunc = FALSE,
patch.as.random = TRUE,
year.as.random = TRUE,
juvestimate = NA,
juvsize = FALSE,
jsize.zero = FALSE,
jsizeb.zero = FALSE,
jsizec.zero = FALSE,
jsize.trunc = FALSE,
jsizeb.trunc = FALSE,
jsizec.trunc = FALSE,
fectime = 2,
censor = NA,
age = NA,
test.age = FALSE,
indcova = NA,
indcovb = NA,
indcovc = NA,
random.indcova = FALSE,
random.indcovb = FALSE,
random.indcovc = FALSE,
test.indcova = FALSE,
test.indcovb = FALSE,
test.indcovc = FALSE,
test.group = FALSE,
show.model.tables = TRUE,
global.only = FALSE,
accuracy = TRUE,
data_out = FALSE,
quiet = FALSE
)
data 
The vertical dataset to be used for analysis. This dataset should
be of class 
stageframe 
The stageframe characterizing the life history model used.
Optional unless 
historical 
A logical variable denoting whether to assess the effects
of state in occasion t1, in addition to state in occasion t.
Defaults to 
approach 
The statistical approach to be taken for model building. The
default is 
suite 
Either a single string value or a vector of 14 strings for each
vital rate model. Describes the global model for each vital rate estimation,
and has the following possible values: 
bestfit 
A variable indicating the model selection criterion for the
choice of bestfit model. The default is 
vitalrates 
A vector describing which vital rates will be estimated via
linear modeling, with the following options: 
surv 
A vector indicating the variable names coding for status as alive
or dead in occasions t+1, t, and t1, respectively.
Defaults to 
obs 
A vector indicating the variable names coding for observation
status in occasions t+1, t, and t1, respectively.
Defaults to 
size 
A vector indicating the variable names coding for the primary
size variable on occasions t+1, t, and t1,
respectively. Defaults to 
sizeb 
A vector indicating the variable names coding for the secondary
size variable on occasions t+1, t, and t1,
respectively. Defaults to 
sizec 
A vector indicating the variable names coding for the tertiary
size variable on occasions t+1, t, and t1,
respectively. Defaults to 
repst 
A vector indicating the variable names coding for reproductive
status in occasions t+1, t, and t1, respectively.
Defaults to 
fec 
A vector indicating the variable names coding for fecundity in
occasions t+1, t, and t1, respectively. Defaults to

stage 
A vector indicating the variable names coding for stage in
occasions t+1, t, and t1. Defaults to

matstat 
A vector indicating the variable names coding for maturity
status in occasions t+1, t, and t1. Defaults to

indiv 
A text value indicating the variable name coding individual
identity. Defaults to 
patch 
A text value indicating the variable name coding for patch,
where patches are defined as permanent subgroups within the study population.
Defaults to 
year 
A text value indicating the variable coding for observation
occasion t. Defaults to 
density 
A text value indicating the name of the variable coding for
spatial density, should the user wish to test spatial density as a fixed
factor affecting vital rates. Defaults to 
test.density 
Either a logical value indicating whether to include

sizedist 
The probability distribution used to model primary size.
Options include 
sizebdist 
The probability distribution used to model secondary size.
Options include 
sizecdist 
The probability distribution used to model tertiary size.
Options include 
fecdist 
The probability distribution used to model fecundity. Options
include 
size.zero 
A logical variable indicating whether the primary size
distribution should be zeroinflated. Only applies to Poisson and negative
binomial distributions. Defaults to 
sizeb.zero 
A logical variable indicating whether the secondary size
distribution should be zeroinflated. Only applies to Poisson and negative
binomial distributions. Defaults to 
sizec.zero 
A logical variable indicating whether the tertiary size
distribution should be zeroinflated. Only applies to Poisson and negative
binomial distributions. Defaults to 
size.trunc 
A logical variable indicating whether the primary size
distribution should be zerotruncated. Only applies to Poisson and negative
binomial distributions. Defaults to 
sizeb.trunc 
A logical variable indicating whether the secondary size
distribution should be zerotruncated. Only applies to Poisson and negative
binomial distributions. Defaults to 
sizec.trunc 
A logical variable indicating whether the tertiary size
distribution should be zerotruncated. Only applies to Poisson and negative
binomial distributions. Defaults to 
fec.zero 
A logical variable indicating whether the fecundity
distribution should be zeroinflated. Only applies to Poisson and negative
binomial distributions. Defaults to 
fec.trunc 
A logical variable indicating whether the fecundity
distribution should be zerotruncated. Only applies to the Poisson and
negative binomial distributions. Defaults to 
patch.as.random 
If set to 
year.as.random 
If set to 
juvestimate 
An optional variable denoting the stage name of the
juvenile stage in the vertical dataset. If not 
juvsize 
A logical variable denoting whether size should be used as a
term in models involving transition from the juvenile stage. Defaults to

jsize.zero 
A logical variable indicating whether the primary size
distribution of juveniles should be zeroinflated. Only applies to Poisson
and negative binomial distributions. Defaults to 
jsizeb.zero 
A logical variable indicating whether the secondary size
distribution of juveniles should be zeroinflated. Only applies to Poisson
and negative binomial distributions. Defaults to 
jsizec.zero 
A logical variable indicating whether the tertiary size
distribution of juveniles should be zeroinflated. Only applies to Poisson
and negative binomial distributions. Defaults to 
jsize.trunc 
A logical variable indicating whether the primary size
distribution in juveniles should be zerotruncated. Defaults to 
jsizeb.trunc 
A logical variable indicating whether the secondary size
distribution in juveniles should be zerotruncated. Defaults to 
jsizec.trunc 
A logical variable indicating whether the tertiary size
distribution in juveniles should be zerotruncated. Defaults to 
fectime 
A variable indicating which year of fecundity to use as the
response term in fecundity models. Options include 
censor 
A vector denoting the names of censoring variables in the
dataset, in order from occasion t+1, followed by occasion t,
and lastly followed by occasion t1. Defaults to 
age 
Designates the name of the variable corresponding to age in time
t in the vertical dataset. Defaults to 
test.age 
Either a logical value indicating whether to include

indcova 
Vector designating the names in occasions t+1,
t, and t1 of an individual covariate. Defaults to 
indcovb 
Vector designating the names in occasions t+1,
t, and t1 of a second individual covariate. Defaults to

indcovc 
Vector designating the names in occasions t+1,
t, and t1 of a third individual covariate. Defaults to

random.indcova 
A logical value indicating whether 
random.indcovb 
A logical value indicating whether 
random.indcovc 
A logical value indicating whether 
test.indcova 
Either a logical value indicating whether to include the

test.indcovb 
Either a logical value indicating whether to include the

test.indcovc 
Either a logical value indicating whether to include the

test.group 
Either a logical value indicating whether to include the

show.model.tables 
If set to TRUE, then includes full modeling tables
in the output. Defaults to 
global.only 
If set to TRUE, then only global models will be built and
evaluated. Defaults to 
accuracy 
A logical value indicating whether to test accuracy of
models. See 
data_out 
A logical value indicating whether to append all subsetted
datasets used in model building and selection to the output. Defaults to

quiet 
May be a logical value, or any one of the strings 
This function yields an object of class lefkoMod
, which is a
list in which the first 14 elements are the bestfit models for survival,
observation status, primary size, secondary size, tertiary size,
reproductive status, fecundity, juvenile survival, juvenile observation,
juvenile primary size, juvenile secondary size, juvenile tertiary size,
juvenile transition to reproduction, and juvenile transition to maturity,
respectively. This is followed by 14 elements corresponding to the model
tables for each of these vital rates, in order, followed by a data frame
showing the order and names of variables used in modeling, followed by a
single character element denoting the criterion used for model selection, and
ending on a data frame with quality control data:
survival_model 
Bestfit model of the binomial probability of survival
from occasion t to occasion t+1. Defaults to 
observation_model 
Bestfit model of the binomial probability of
observation in occasion t+1 given survival to that occasion. Defaults
to 
size_model 
Bestfit model of the primary size metric on occasion
t+1 given survival to and observation in that occasion. Defaults to

sizeb_model 
Bestfit model of the secondary size metric on occasion
t+1 given survival to and observation in that occasion. Defaults to

sizec_model 
Bestfit model of the tertiary size metric on occasion
t+1 given survival to and observation in that occasion. Defaults to

repstatus_model 
Bestfit model of the binomial probability of
reproduction in occasion t+1, given survival to and observation in
that occasion. Defaults to 
fecundity_model 
Bestfit model of fecundity in occasion t+1
given survival to, and observation and reproduction in that occasion.
Defaults to 
juv_survival_model 
Bestfit model of the binomial probability of
survival from occasion t to occasion t+1 of an immature
individual. Defaults to 
juv_observation_model 
Bestfit model of the binomial probability of
observation in occasion t+1 given survival to that occasion of an
immature individual. Defaults to 
juv_size_model 
Bestfit model of the primary size metric on occasion
t+1 given survival to and observation in that occasion of an immature
individual. Defaults to 
juv_sizeb_model 
Bestfit model of the secondary size metric on
occasion t+1 given survival to and observation in that occasion of an
immature individual. Defaults to 
juv_sizec_model 
Bestfit model of the tertiary size metric on occasion
t+1 given survival to and observation in that occasion of an immature
individual. Defaults to 
juv_reproduction_model 
Bestfit model of the binomial probability of
reproduction in occasion t+1, given survival to and observation in
that occasion of an individual that was immature in occasion t. This
model is technically not a model of reproduction probability for individuals
that are immature, rather reproduction probability here is given for
individuals that are mature in occasion t+1 but immature in occasion
t. Defaults to 
juv_maturity_model 
Bestfit model of the binomial probability of
becoming mature in occasion t+1, given survival to that occasion of an
individual that was immature in occasion t. Defaults to 
survival_table 
Full dredge model table of survival probability. 
observation_table 
Full dredge model table of observation probability. 
size_table 
Full dredge model table of the primary size variable. 
sizeb_table 
Full dredge model table of the secondary size variable. 
sizec_table 
Full dredge model table of the tertiary size variable. 
repstatus_table 
Full dredge model table of reproduction probability. 
fecundity_table 
Full dredge model table of fecundity. 
juv_survival_table 
Full dredge model table of immature survival probability. 
juv_observation_table 
Full dredge model table of immature observation probability. 
juv_size_table 
Full dredge model table of primary size in immature individuals. 
juv_sizeb_table 
Full dredge model table of secondary size in immature individuals. 
juv_sizec_table 
Full dredge model table of tertiary size in immature individuals. 
juv_reproduction_table 
Full dredge model table of immature reproduction probability. 
juv_maturity_table 
Full dredge model table of the probability of an immature individual transitioning to maturity. 
paramnames 
A data frame showing the names of variables from the input data frame used in modeling, their associated standardized names in linear models, and a brief comment describing each variable. 
criterion 
Character variable denoting the criterion used to determine the bestfit model. 
qc 
Data frame with five variables: 1) Name of vital rate, 2) number of individuals used to model that vital rate, 3) number of individual transitions used to model that vital rate, 4) parameter distribution used to model the vital rats, and 5) accuracy of model, given as detailed in Notes section. 
subdata 
An optional list of data frames, each of which is the data
frame used to develop each model in the 
When modelsearch()
is called, it first trims the dataset down to just
the variables that will be used (including all response terms and independent
variables). It then subsets the data to only complete cases for those
variables. Next, it builds global models for all vital rates, and runs them.
If a global model fails, then the function proceeds by dropping terms. If
approach = "mixed"
, then it will determine which random factor term
contains the most categories in the respective subset, and drop that term.
If this fails, or if approach = "glm"
, then it will drop any twoway
interactions and run the model. If this fails, then the function will attempt
to drop further terms, first patch alone, then year alone, then individual
covariates by themselves, then combinations of these four, and finally
individual identity. If all of these attempts fail and the approach used is
mixed
, then the function will try running a glm version of the
original failed model. Finally, if all attempts fail, then the function
displays a warning and returns 1
to allow model building assuming a
constant rate or probability.
Setting suite = "cons"
prevents the inclusion of size and reproductive
status as fixed, independent factors in modeling. However, it does not
prevent any other terms from being included. Density, age, individual
covariates, individual identity, patch, and year may all be included.
The mechanics governing model building are fairly robust to errors and
exceptions. The function attempts to build global models, and simplifies
models automatically should model building fail. Model building proceeds
through the functions lm()
(GLM with Gaussian response),
glm()
(GLM with Poisson, Gamma, or binomial response),
glm.nb()
(GLM with negative binomial response),
zeroinfl()
(GLM with zeroinflated Poisson or negative
binomial response), vglm()
(GLM with zerotruncated
Poisson or negative binomial response), lmer()
(mixed
model with Gaussian response), glmer()
(mixed model with
binomial, Poisson, or Gamma response), and glmmTMB()
(mixed model with negative binomial, or zerotruncated or zeroinflated
Poisson or negative binomial response). See documentation related to these
functions for further information. Any response term that is invariable in
the dataset will lead to a bestfit model for that response represented by a
single constant value.
Exhaustive model building and selection proceeds via the
dredge()
function in package MuMIn
. This function
is verbose, so that any errors and warnings developed during model building,
model analysis, and model selection can be found and dealt with.
Interpretations of errors during global model analysis may be found in
documentation for the functions and packages mentioned. Package MuMIn
is used for model dredging (see dredge()), and errors and
warnings during dredging can be interpreted using the documentation for that
package. Errors occurring during dredging lead to the adoption of the global
model as the bestfit, and the user should view all logged errors and
warnings to determine the best way to proceed. The quiet = TRUE
and
quiet = "partial"
options can be used to silence dredge warnings, but
users should note that automated model selection can be viewed as a black
box, and so care should be taken to ensure that the models run make
biological sense, and that model quality is prioritized.
Exhaustive model selection through dredging works best with larger datasets
and fewer tested parameters. Setting suite = "full"
may initiate a
dredge that takes a dramatically long time, particularly if the model is
historical, individual covariates are used, or a zeroinflated distribution
is assumed. In such cases, the number of models built and tested will run at
least in the millions. Small datasets will also increase the error associated
with these tests, leading to adoption of simpler models overall. Note also
that zeroinflated models are processed as two models, and so include twice
the assumed number of parameters. If suite = "full"
, then this
function will switch to a main effects global model for the zeroinflated
parameter models if the total number of parameters to test rises above the
limits imposed by the dredge()
function in package
MuMIn
.
Accuracy of vital rate models is calculated differently depending on vital
rate and assumed distribution. For all vital rates assuming a binomial
distribution, including survival, observation status, reproductive status,
and juvenile version of these, accuracy is calculated as the percent of
predicted responses equal to actual responses. In all other models, accuracy
is actually assessed as a simple Rsquared in which the observed response
values per data subset are compared to the predicted response values
according to each bestfit model. Note that some situations in which factor
variables are used may result in failure to assess accuracy. In these cases,
function modelsearch()
simply yields NA
values.
Care must be taken to build models that test the impacts of state in occasion
t1 for historical models, and that do not test these impacts for
ahistorical models. Ahistorical matrix modeling particularly will yield
biased transition estimates if historical terms from models are ignored. This
can be dealt with at the start of modeling by setting
historical = FALSE
for the ahistorical case, and
historical = TRUE
for the historical case.
This function handles generalized linear models (GLMs) under zeroinflated
distributions using the zeroinfl()
function, and zero
truncated distributions using the vglm()
function. Model
dredging may fail with these functions, leading to the global model being
accepted as the bestfit model. However, model dredges of mixed models work
for all distributions. We encourage the use of mixed models in all cases.
The negative binomial and truncated negative binomial distributions use the quadratic structure emphasized in Hardin and Hilbe (2018, 4th Edition of Generalized Linear Models and Extensions). The truncated negative binomial distribution may fail to predict size probabilities correctly when dispersion is near that expected of the Poisson distribution. To prevent this problem, we have integrated a cap on the overdispersion parameter. However, when using this distribution, please check the matrix column sums to make sure that they do not predict survival greater than 1.0. If they do, then please use either the negative binomial distribution or the zerotruncated Poisson distribution.
If density dependence is explored through function modelsearch()
,
then the interpretation of density is not the full population size but rather
the spatial density term included in the dataset.
Users building vital rate models for Leslie matrices must set
vitalrates = c("surv", "fec")
or vitalrates = "leslie"
rather
than the default, because only survival and fecundity should be estimated in
these cases. Also, the suite
setting can be set to either age
or cons
, as the results will be exactly the same.
Users wishing to test age, density, group, or individual covariates, must
include test.age = TRUE
, test.density = TRUE
,
test.group = TRUE
, or test.indcova = TRUE
(or
test.indcovb = TRUE
or test.indcovc = TRUE
, whichever is most
appropriate), respectively, in addition to stipulating the name of the
variable within the dataset. The default for these options is always
FALSE
.
data(lathyrus)
sizevector < c(0, 4.6, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8,
9)
stagevector < c("Sd", "Sdl", "Dorm", "Sz1nr", "Sz2nr", "Sz3nr", "Sz4nr",
"Sz5nr", "Sz6nr", "Sz7nr", "Sz8nr", "Sz9nr", "Sz1r", "Sz2r", "Sz3r",
"Sz4r", "Sz5r", "Sz6r", "Sz7r", "Sz8r", "Sz9r")
repvector < c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
obsvector < c(0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
matvector < c(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
immvector < c(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
propvector < c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0)
indataset < c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
binvec < c(0, 4.6, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5)
lathframeln < sf_create(sizes = sizevector, stagenames = stagevector,
repstatus = repvector, obsstatus = obsvector, matstatus = matvector,
immstatus = immvector, indataset = indataset, binhalfwidth = binvec,
propstatus = propvector)
lathvertln < verticalize3(lathyrus, noyears = 4, firstyear = 1988,
patchidcol = "SUBPLOT", individcol = "GENET", blocksize = 9,
juvcol = "Seedling1988", sizeacol = "lnVol88", repstracol = "Intactseed88",
fecacol = "Intactseed88", deadacol = "Dead1988",
nonobsacol = "Dormant1988", stageassign = lathframeln, stagesize = "sizea",
censorcol = "Missing1988", censorkeep = NA, NAas0 = TRUE, censor = TRUE)
lathvertln$feca2 < round(lathvertln$feca2)
lathvertln$feca1 < round(lathvertln$feca1)
lathvertln$feca3 < round(lathvertln$feca3)
lathmodelsln3 < modelsearch(lathvertln, historical = TRUE,
approach = "mixed", suite = "main",
vitalrates = c("surv", "obs", "size", "repst", "fec"), juvestimate = "Sdl",
bestfit = "AICc&k", sizedist = "gaussian", fecdist = "poisson",
indiv = "individ", patch = "patchid", year = "year2",year.as.random = TRUE,
patch.as.random = TRUE, show.model.tables = TRUE, quiet = "partial")
# Here we use supplemental() to provide overwrite and reproductive info
lathsupp3 < supplemental(stage3 = c("Sd", "Sd", "Sdl", "Sdl", "mat", "Sd", "Sdl"),
stage2 = c("Sd", "Sd", "Sd", "Sd", "Sdl", "rep", "rep"),
stage1 = c("Sd", "rep", "Sd", "rep", "Sd", "mat", "mat"),
eststage3 = c(NA, NA, NA, NA, "mat", NA, NA),
eststage2 = c(NA, NA, NA, NA, "Sdl", NA, NA),
eststage1 = c(NA, NA, NA, NA, "Sdl", NA, NA),
givenrate = c(0.345, 0.345, 0.054, 0.054, NA, NA, NA),
multiplier = c(NA, NA, NA, NA, NA, 0.345, 0.054),
type = c(1, 1, 1, 1, 1, 3, 3), type_t12 = c(1, 2, 1, 2, 1, 1, 1),
stageframe = lathframeln, historical = TRUE)
lathmat3ln < flefko3(year = "all", patch = "all", stageframe = lathframeln,
modelsuite = lathmodelsln3, data = lathvertln, supplement = lathsupp3,
reduce = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.