knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

This script examines different error structures (Poisson, negative binomial, etc.) and uses information criteria (AIC and BIC) to determine which are most promising.

Once the overall error structure has been assessed, the fixed effects and random effect can be assessed; these tasks are each done in a separate script.

Preliminaries

library(avesdemazan)

# Model fitting
library(lme4)       # glmer() for glmm
library(glmmTMB)    # glmm with AR-1  
# library(blme)       # bglmer (used?)

# Model evaluation
# library(afex)       # functionall_fit()
# library(multcomp)   
# library(emmeans)
library(bbmle)      # aictab
library(arm)        # se.fit()

# Result visualization
# library(ggplot2)    
# library(ggstance)  
# library(grid)
# library(gridExtra)  

#  markdown etc
library(knitr)    

Get current version / citation info for key packages

packageVersion("glmmTMB")
citation("bbmle")

Load data

Load capture data.

data("ecuador")

Key columns for models run in tis script:

head(ecuador[,1:9])

Example data

NOTE! glmmTMB has recently been throwing an error even when models are Ok

(Model from glmmTMB documentation).

(m1 <- glmmTMB(count ~ mined +
                      (1|site),
                     ziformula = ~mined,
                     family = poisson, 
                     data = Salamanders))
summary(m1)

Recently this error was thrown, though may be cleared up in more recent versions.

>Warning messages:
1: In Matrix::sparseMatrix(dims = c(0, 0), i = integer(0), j = integer(0),  :
  'giveCsparse' has been deprecated; setting 'repr = "T"' for you

According to Bolker its harmless https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse

Species-specifc models of captures

Formulas

Possible random effects

Species-Location:

Time random effects:

Formula 1: No Autocorrelation structure

form_glmm_no_autocorr <- formula(N ~ 1 +          # Intercept
                      Location +                  # Location FE; 3 level factor
                      # session +                 # session FE
                      # Location*session  +       # Location*session  FE
                      time_cts +                  # Continuous time variable
                      (1|Specie.Code) +           # species RE; DOES THIS CONVERGE?
                      (1|Specie.Code:session) +   # DOES THIS CONVERGE? <= over fit?
                      (1|year:session) +          # DOES THIS CONVERGE? <= add year:sessions:loc?
                      (1|Specie.Code:Location) +  # Species:location intercept
                      (time_cts + 
                       0|Specie.Code:Location) +   # Species-location RS
                      offset(log(tot_net_hours))   # offset for effort
                      )

This is similar to Emily's original model and has not been updated with my most recent thinking

# form_glmm_ar1 <- formula(N ~ 1 +                                    # Null
#                            Location +                               # Location
#                            time_cts +                             # Continuous time variable
#                            (1|Specie.Code:Location) +             # Species-level intercept
#                            (time_cts + 
#                                 0|Specie.Code:Location) +
#                            
#                            ar1(as.ordered(time_cts) + 0|Specie.Code:Location) +
#                            offset(log(tot_net_hours)) ) # Autocorrelation across 

update of above (?) with additional notes and things commented out (?)

form_glmm_ar1 <- formula(N ~ 1 +          # Intercept
                      Location +                  # Location FE; 3 level factor
                      # session +                 # session FE
                      # Location*session  +       # Location*session  FE
                      time_cts +                  # Continuous time variable
                      (1|Specie.Code) +         # species RE; DOES THIS CONVERGE?
                      (1|Specie.Code:session) +         #DOES THIS CONVERGE?
                      (1|year:session) +         # DOES THIS CONVERGE? 
                      # (1|Specie.Code:Location) +  # Species:location intercept
                      ar1(as.ordered(time_cts) + 0|Specie.Code:Location)

                      (time_cts + 
                       0|Specie.Code:Location) +   # Species-location RS

                      offset(log(tot_net_hours))   # offset for effort
                      )

Standard GLMMs - no autocorrelation

None of these models have autocorrelation

# base- model
fit_pois <- glmmTMB(form_glmm_no_autocorr,         
                      family = poisson,
                      data = ecuador)

# zero-inflacted poisson
fit_zip <- update(fit_pois,
                  ziformula = ~1)

# negative-binomial-1
## (not a good model but included for completeness)
fit_nb1 <- update(fit_pois,
                  family = nbinom1) #nb1

# negative-binomial-2
## Typically the best model
fit_nb2 <- update(fit_pois,
                  family = nbinom2) #nb2

# zero-inflated negative-binomial-1
fit_zinb1 <- update(fit_pois,
                    family = nbinom1, #nb1
                     ziformula = ~ 1)

# zero-inflated negative-binomial-2
fit_zinb2 <- update(fit_pois,
                    family = nbinom2, #nb2
                     ziformula = ~ 1)

AIC for models WITHOUT autocorrelation

AICtab(fit_pois,
       fit_zip,
       fit_nb1,
       fit_nb2,
       fit_zinb1,
       fit_zinb2)

Examine nb2 model, which is the best.

summary(fit_nb2)

AR-1 models

# Converges
## After updating the minor misspecificaion in models
## fit by ES this model is the best performing one.
fam_poisson <- "poisson"
fit_pois_corr <- glmmTMB(form_glmm_ar1,             # Offset for effort
                      family = fam_poisson,
                      data = ecuador)

# ZIP
## converges
fit_zip_corr <- update(fit_pois_corr,
                  ziformula = ~1)

# negative-binomial 1
## (poor model, included for completeness)
## throws error as of ## throws error as of Tue Aug 10
fit_nb1_corr <- update(fit_pois_corr,
                  family = nbinom1) #nb1

# typically best error structure
# BUT - when it has autocorrelation and
##      the minor misspecificaiton in ES original models, 
##      it throws warnings
## throws error as of Tue Aug 10
fit_nb2_corr <- update(fit_pois_corr,
                  family = nbinom2) #nb2

# zero-inflated negbin
## throws error as of Tue Aug 10
fit_zinb2_corr <- update(fit_pois_corr,
                    family = nbinom2, #nb2
                     ziformula = ~ 1)
summary(fit_nb2_corr)
summary(fit_nb2)

AICtab(fit_nb2_corr, 
       fit_pois_corr,
       fit_pois,
       fit_nb2)
fit_zip0 <- glmmTMB(N ~ 1 +                                                    # Null
                     Location +                                               # Location
                     time_cts +                                               # Continuous time variable
                     (1|Specie.Code:Location) +                               # Species-level intercept
                     (time_cts + 0|Specie.Code:Location) +
                     offset(log(tot_net_hours)),
                   family = poisson,
                   ziformula = ~1,
                   data = ecuador)

# not run by EM
fit_nb10 <- glmmTMB(N ~ 1 +                                                    # Null
                     Location +                                               # Location
                     time_cts +                                               # Continuous time variable
                     (1|Specie.Code:Location) +                               # Species-level intercept'
                     (time_cts + 0|Specie.Code:Location) +
                     offset(log(tot_net_hours)),
                   family = nbinom1,
                   data = ecuador)

fit_nb20 <- glmmTMB(N ~ 1 +                                                    # Null
                     Location +                                               # Location
                     time_cts +                                               # Continuous time variable
                     (1|Specie.Code:Location) +                               # Species-level intercept'
                     (time_cts + 0|Specie.Code:Location) +
                     offset(log(tot_net_hours)),
                   family = nbinom2,
                   data = ecuador)

fit_zinb20 <- glmmTMB(N ~ 1 +                                                  # Null
                       Location +                                             # Location
                       time_cts +                                             # Continuous time variable
                       (1|Specie.Code:Location) +                             # Species-level intercept
                       (time_cts + 0|Specie.Code:Location) +
                       offset(log(tot_net_hours)),
                     family = nbinom2,
                     ziformula = ~ 1,
                     data = ecuador)

GLMMs + AR(1)

CHECK AR-1 should be Specie.Code:Location but sometimes convergence problems

overdisp_fun <- function(model) {
    rdf <- df.residual(model)
    rp <- residuals(model,type="pearson")
    Pearson.chisq <- sum(rp^2)
    prat <- Pearson.chisq/rdf
    pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
    c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
fit_pois_corr0 <- glmmTMB(N ~ 1 +                                                   # Null
                           Location +                                              # Location
                           time_cts +                                              # Continuous time variable
                           (1|Specie.Code:Location) +                              # Species-level intercept
                           (time_cts + 0|Specie.Code:Location) +
                           ar1(as.ordered(time_cts) + 0|Specie.Code:Location) +   # Autocorrelation across time
                           #(1|i) +                                               # Individual level random effect for pois-norm
                           offset(log(tot_net_hours)),
                         family = poisson,
                         data = ecuador)
overdisp_fun(fit_pois_corr0)

## Major Convergence problems
# fit_zip_corr0 <- glmmTMB(N ~ 1 +                                                    # Null
#                           Location +                                               # Location
#                           time_cts +                                               # Continuous time variable
#                           (1|Specie.Code:Location) +                               # Species-level intercept
#                           (time_cts + 0|Specie.Code:Location) +
#                           ar1(as.ordered(time_cts) + 0|Specie.Code:Location) +     # Autocorrelation across time
#                           #(1|i) +                                                  # Individual level random effect for pois-norm
#                           offset(log(tot_net_hours)),
#                         family = poisson,
#                         ziformula = ~1,
#                         data = ecuador)

summary(ecuador$tot.yrs)

i4 <- which(ecuador$tot.yrs > 1)

fit_nb1_corr0 <- glmmTMB(N ~ 1 +                                                    # Null
                          #Location +                                               # Location
                          time_cts +                                               # Continuous time variable
                          (1|Specie.Code:Location) +                               # Species-level intercept
                          (time_cts + 0|Specie.Code:Location) +
                          ar1(as.ordered(time_cts) + 0|Specie.Code) +              # Autocorrelation across time
                          offset(log(tot_net_hours)),
                        family = nbinom1,
                        data = ecuador)

summary(fit_nb1_corr0)

# fit_nb2_corr0 <- glmmTMB(N ~ 1 +                                   #  Intercept
#                       Location +                                  # Location
#                       time_cts +                                  # Continuous time variable
#                       (1|Specie.Code:Location) +                  # Species-level intercept
#                       (time_cts + 0|Specie.Code:Location) +       # Species-location slopes
#                       offset(log(tot_net_hours)),                 # Offset for effort
#                       ar1(as.ordered(time_cts) + 0|Specie.Code) + # Autocorrelation across time
#                       family = nbinom2,
#                       data = ecuador)


fit_nb2_corr0 <- glmmTMB(N ~ 1 +                                                    # Null
                          Location +                                               # Location
                          time_cts +                                               # Continuous time variable
                          (1|Specie.Code:Location) +                               # Species-level intercept
                          (time_cts + 0|Specie.Code:Location) +
                          ar1(as.ordered(time_cts) + 0|Specie.Code) + # Autocorrelation across time
                          offset(log(tot_net_hours)),
                        family = nbinom2,
                        data = ecuador)

# waring: "Model convergence problem; extreme or very small eigen values detected. See vignette('troubleshooting')"
fit_zinb2_corr0 <- glmmTMB(N ~ 1 +                                                  # Null
                            Location +                                             # Location
                            time_cts +                                             # Continuous time variable
                            (1|Specie.Code:Location) +                             # Species-level intercept
                            (time_cts + 0|Specie.Code:Location) +
                            ar1(as.ordered(time_cts) + 0|Specie.Code:Location) +   # Autocorrelation across time
                            offset(log(tot_net_hours)),
                          family = nbinom2,
                          ziformula = ~1,
                          data = ecuador)

Evalute model fit

# AIC
aic <- AICtab(fit_pois,
              fit_pois_corr, 
              fit_zip, 
              #fit_zip_corr,
              #fit_nb1,
              #fit_nb1_corr,
              fit_nb2, 
              fit_nb2_corr, 
              fit_zinb2,
              #fit_zinb2_corr # note: fit_zinb2_corr throws warning
              base = TRUE,
              mnames = c("Poisson",
                         "Poisson AR-1",
                         "Zero-inflated Poisson",
                         "Negative-binomial2",
                         "Neg-binomial2 AR-1",
                         "Zero-inflated binomial2"),
              logLik = TRUE)

# BIC
bic <- BICtab(fit_pois,
              fit_pois_corr, 
              fit_zip, 
              #fit_zip_corr,
              #fit_nb1,
              #fit_nb1_corr,
              fit_nb2, 
              fit_nb2_corr, 
              fit_zinb2,
              #fit_zinb2_corr # note: fit_zinb2_corr throws warning
              base = TRUE,
              mnames = c("Poisson",
                         "Poisson AR-1",
                         "Zero-inflated Poisson",
                         "Negative-binomial2",
                         "Neg-binomial2 AR-1",
                         "Zero-inflated binomial2"),
              logLik = TRUE)

# aic <- data.frame(dAIC = aic$dAIC, df = aic$df)
# 
# rownames(aic) <- c("fit_nb2_corr",
#                    "fit_zinb2_corr",
#                    "fit_zinb2", 
#                    #"fit_zip_corr", 
#                    "fit_nb2",
#                    "fit_pois_corr", 
#                    "fit_zip", 
#                    "fit_pois")
kable(aic, caption = "AIC for different models")

# bic <- data.frame(dBIC = bic$dBIC, df = bic$df)
# rownames(bic) <- c("fit_nb2_corr", "fit_nb2",
#                    "fit_zinb2", #"fit_zip_corr", 
#                    "fit_zinb2_corr",
#                    "fit_pois_corr", "fit_zip", "fit_pois")
kable(bic)

Save AIC table as .csv

write.csv(aic, "aic_table.csv")
write.csv(bic, "bic_table.csv")


brouwern/avesdemazan documentation built on July 26, 2022, 8:38 p.m.