knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
This script examines different error structures (Poisson, negative binomial, etc.) and uses information criteria (AIC and BIC) to determine which are most promising.
Once the overall error structure has been assessed, the fixed effects and random effect can be assessed; these tasks are each done in a separate script.
library(avesdemazan) # Model fitting library(lme4) # glmer() for glmm library(glmmTMB) # glmm with AR-1 # library(blme) # bglmer (used?) # Model evaluation # library(afex) # functionall_fit() # library(multcomp) # library(emmeans) library(bbmle) # aictab library(arm) # se.fit() # Result visualization # library(ggplot2) # library(ggstance) # library(grid) # library(gridExtra) # markdown etc library(knitr)
packageVersion("glmmTMB") citation("bbmle")
Load capture data.
data("ecuador")
Key columns for models run in tis script:
head(ecuador[,1:9])
NOTE! glmmTMB has recently been throwing an error even when models are Ok
(Model from glmmTMB documentation).
(m1 <- glmmTMB(count ~ mined + (1|site), ziformula = ~mined, family = poisson, data = Salamanders)) summary(m1)
Recently this error was thrown, though may be cleared up in more recent versions.
>Warning messages: 1: In Matrix::sparseMatrix(dims = c(0, 0), i = integer(0), j = integer(0), : 'giveCsparse' has been deprecated; setting 'repr = "T"' for you
According to Bolker its harmless https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
Species-Location:
Time random effects:
form_glmm_no_autocorr <- formula(N ~ 1 + # Intercept Location + # Location FE; 3 level factor # session + # session FE # Location*session + # Location*session FE time_cts + # Continuous time variable (1|Specie.Code) + # species RE; DOES THIS CONVERGE? (1|Specie.Code:session) + # DOES THIS CONVERGE? <= over fit? (1|year:session) + # DOES THIS CONVERGE? <= add year:sessions:loc? (1|Specie.Code:Location) + # Species:location intercept (time_cts + 0|Specie.Code:Location) + # Species-location RS offset(log(tot_net_hours)) # offset for effort )
This is similar to Emily's original model and has not been updated with my most recent thinking
# form_glmm_ar1 <- formula(N ~ 1 + # Null # Location + # Location # time_cts + # Continuous time variable # (1|Specie.Code:Location) + # Species-level intercept # (time_cts + # 0|Specie.Code:Location) + # # ar1(as.ordered(time_cts) + 0|Specie.Code:Location) + # offset(log(tot_net_hours)) ) # Autocorrelation across
update of above (?) with additional notes and things commented out (?)
form_glmm_ar1 <- formula(N ~ 1 + # Intercept Location + # Location FE; 3 level factor # session + # session FE # Location*session + # Location*session FE time_cts + # Continuous time variable (1|Specie.Code) + # species RE; DOES THIS CONVERGE? (1|Specie.Code:session) + #DOES THIS CONVERGE? (1|year:session) + # DOES THIS CONVERGE? # (1|Specie.Code:Location) + # Species:location intercept ar1(as.ordered(time_cts) + 0|Specie.Code:Location) (time_cts + 0|Specie.Code:Location) + # Species-location RS offset(log(tot_net_hours)) # offset for effort )
None of these models have autocorrelation
# base- model fit_pois <- glmmTMB(form_glmm_no_autocorr, family = poisson, data = ecuador) # zero-inflacted poisson fit_zip <- update(fit_pois, ziformula = ~1) # negative-binomial-1 ## (not a good model but included for completeness) fit_nb1 <- update(fit_pois, family = nbinom1) #nb1 # negative-binomial-2 ## Typically the best model fit_nb2 <- update(fit_pois, family = nbinom2) #nb2 # zero-inflated negative-binomial-1 fit_zinb1 <- update(fit_pois, family = nbinom1, #nb1 ziformula = ~ 1) # zero-inflated negative-binomial-2 fit_zinb2 <- update(fit_pois, family = nbinom2, #nb2 ziformula = ~ 1)
AIC for models WITHOUT autocorrelation
AICtab(fit_pois,
fit_zip,
fit_nb1,
fit_nb2,
fit_zinb1,
fit_zinb2)
Examine nb2 model, which is the best.
summary(fit_nb2)
# Converges ## After updating the minor misspecificaion in models ## fit by ES this model is the best performing one. fam_poisson <- "poisson" fit_pois_corr <- glmmTMB(form_glmm_ar1, # Offset for effort family = fam_poisson, data = ecuador) # ZIP ## converges fit_zip_corr <- update(fit_pois_corr, ziformula = ~1) # negative-binomial 1 ## (poor model, included for completeness) ## throws error as of ## throws error as of Tue Aug 10 fit_nb1_corr <- update(fit_pois_corr, family = nbinom1) #nb1 # typically best error structure # BUT - when it has autocorrelation and ## the minor misspecificaiton in ES original models, ## it throws warnings ## throws error as of Tue Aug 10 fit_nb2_corr <- update(fit_pois_corr, family = nbinom2) #nb2 # zero-inflated negbin ## throws error as of Tue Aug 10 fit_zinb2_corr <- update(fit_pois_corr, family = nbinom2, #nb2 ziformula = ~ 1)
summary(fit_nb2_corr) summary(fit_nb2) AICtab(fit_nb2_corr, fit_pois_corr, fit_pois, fit_nb2)
fit_zip0 <- glmmTMB(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (time_cts + 0|Specie.Code:Location) + offset(log(tot_net_hours)), family = poisson, ziformula = ~1, data = ecuador) # not run by EM fit_nb10 <- glmmTMB(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept' (time_cts + 0|Specie.Code:Location) + offset(log(tot_net_hours)), family = nbinom1, data = ecuador) fit_nb20 <- glmmTMB(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept' (time_cts + 0|Specie.Code:Location) + offset(log(tot_net_hours)), family = nbinom2, data = ecuador) fit_zinb20 <- glmmTMB(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (time_cts + 0|Specie.Code:Location) + offset(log(tot_net_hours)), family = nbinom2, ziformula = ~ 1, data = ecuador)
CHECK AR-1 should be Specie.Code:Location but sometimes convergence problems
overdisp_fun <- function(model) { rdf <- df.residual(model) rp <- residuals(model,type="pearson") Pearson.chisq <- sum(rp^2) prat <- Pearson.chisq/rdf pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE) c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval) }
fit_pois_corr0 <- glmmTMB(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (time_cts + 0|Specie.Code:Location) + ar1(as.ordered(time_cts) + 0|Specie.Code:Location) + # Autocorrelation across time #(1|i) + # Individual level random effect for pois-norm offset(log(tot_net_hours)), family = poisson, data = ecuador) overdisp_fun(fit_pois_corr0) ## Major Convergence problems # fit_zip_corr0 <- glmmTMB(N ~ 1 + # Null # Location + # Location # time_cts + # Continuous time variable # (1|Specie.Code:Location) + # Species-level intercept # (time_cts + 0|Specie.Code:Location) + # ar1(as.ordered(time_cts) + 0|Specie.Code:Location) + # Autocorrelation across time # #(1|i) + # Individual level random effect for pois-norm # offset(log(tot_net_hours)), # family = poisson, # ziformula = ~1, # data = ecuador) summary(ecuador$tot.yrs) i4 <- which(ecuador$tot.yrs > 1) fit_nb1_corr0 <- glmmTMB(N ~ 1 + # Null #Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (time_cts + 0|Specie.Code:Location) + ar1(as.ordered(time_cts) + 0|Specie.Code) + # Autocorrelation across time offset(log(tot_net_hours)), family = nbinom1, data = ecuador) summary(fit_nb1_corr0) # fit_nb2_corr0 <- glmmTMB(N ~ 1 + # Intercept # Location + # Location # time_cts + # Continuous time variable # (1|Specie.Code:Location) + # Species-level intercept # (time_cts + 0|Specie.Code:Location) + # Species-location slopes # offset(log(tot_net_hours)), # Offset for effort # ar1(as.ordered(time_cts) + 0|Specie.Code) + # Autocorrelation across time # family = nbinom2, # data = ecuador) fit_nb2_corr0 <- glmmTMB(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (time_cts + 0|Specie.Code:Location) + ar1(as.ordered(time_cts) + 0|Specie.Code) + # Autocorrelation across time offset(log(tot_net_hours)), family = nbinom2, data = ecuador) # waring: "Model convergence problem; extreme or very small eigen values detected. See vignette('troubleshooting')" fit_zinb2_corr0 <- glmmTMB(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (time_cts + 0|Specie.Code:Location) + ar1(as.ordered(time_cts) + 0|Specie.Code:Location) + # Autocorrelation across time offset(log(tot_net_hours)), family = nbinom2, ziformula = ~1, data = ecuador)
# AIC aic <- AICtab(fit_pois, fit_pois_corr, fit_zip, #fit_zip_corr, #fit_nb1, #fit_nb1_corr, fit_nb2, fit_nb2_corr, fit_zinb2, #fit_zinb2_corr # note: fit_zinb2_corr throws warning base = TRUE, mnames = c("Poisson", "Poisson AR-1", "Zero-inflated Poisson", "Negative-binomial2", "Neg-binomial2 AR-1", "Zero-inflated binomial2"), logLik = TRUE) # BIC bic <- BICtab(fit_pois, fit_pois_corr, fit_zip, #fit_zip_corr, #fit_nb1, #fit_nb1_corr, fit_nb2, fit_nb2_corr, fit_zinb2, #fit_zinb2_corr # note: fit_zinb2_corr throws warning base = TRUE, mnames = c("Poisson", "Poisson AR-1", "Zero-inflated Poisson", "Negative-binomial2", "Neg-binomial2 AR-1", "Zero-inflated binomial2"), logLik = TRUE) # aic <- data.frame(dAIC = aic$dAIC, df = aic$df) # # rownames(aic) <- c("fit_nb2_corr", # "fit_zinb2_corr", # "fit_zinb2", # #"fit_zip_corr", # "fit_nb2", # "fit_pois_corr", # "fit_zip", # "fit_pois") kable(aic, caption = "AIC for different models") # bic <- data.frame(dBIC = bic$dBIC, df = bic$df) # rownames(bic) <- c("fit_nb2_corr", "fit_nb2", # "fit_zinb2", #"fit_zip_corr", # "fit_zinb2_corr", # "fit_pois_corr", "fit_zip", "fit_pois") kable(bic)
Save AIC table as .csv
write.csv(aic, "aic_table.csv") write.csv(bic, "bic_table.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.