knitr::opts_chunk$set(echo = TRUE)
#install.packages("DHARMa") library(DHARMa)
https://stats.stackexchange.com/questions/417680/dealing-with-overdispersed-negative-binomial-using-glmmtmb
dispfun <- function(m) { r <- residuals(m,type="pearson") n <- df.residual(m) dsq <- sum(r^2) c(dsq=dsq,n=n,disp=dsq/n) } dispfun(fit_nb2_corr_8plus)
https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#testing-for-overdispersioncomputing-overdispersion-factor
overdisp_fun <- function(model) { rdf <- df.residual(model) rp <- residuals(model,type="pearson") Pearson.chisq <- sum(rp^2) prat <- Pearson.chisq/rdf pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE) c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval) }
form_glmm_ar1_ES <- formula(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (time_cts + 0|Specie.Code:Location) + ar1(as.ordered(time_cts) + # Autocorrelation across 0|Specie.Code) + offset(log(tot_net_hours)) )
fit_nb2_corr_all_ES <- glmmTMB(form_glmm_ar1_ES, family = nbinom2, data = ecuador[, ])
# no warnings # overdisp parameter: 4.84 # "disp" 0.7696327 summary(fit_nb2_corr_all_ES) dispfun(fit_nb2_corr_all_ES) diagnose_hessian(fit_nb2_corr_all_ES) #diagnose_vcov(fit_nb2_corr_all_ES)
form_glmm_ar1 <- formula(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code:Location) + # Species-level intercept (1|year:session) + (1|year:session:Location) + (time_cts + 0|Specie.Code:Location) + ar1(as.ordered(time_cts) + # Autocorrelation across 0|Specie.Code:Location) + offset(log(tot_net_hours)) )
summary(ecuador$tot.yrs) table(ecuador$tot.yrs ) i5 <- which(ecuador$tot.yrs > 4) i6 <- which(ecuador$tot.yrs > 5) i7 <- which(ecuador$tot.yrs > 6) i8 <- which(ecuador$tot.yrs > 7)
# 1 warnings ## Model convergence problem; non-positive-definite Hessian matrix. ## Overdispersion: 1.5e+06 fit_nb2_corr_all <- glmmTMB(form_glmm_ar1, family = nbinom2, data = ecuador[, ]) diagnose_hessian(fit_nb2_corr_all) # betad #diagnose_vcov(fit_nb2_corr_all_ES) # 2 warnings ## Model convergence problem; non-positive-definite Hessian matrix. ## Model convergence problem; false convergence (8) ## overdipserion: 3.78e+06 fit_nb2_corr_5plus <- glmmTMB(form_glmm_ar1, family = nbinom2, data = ecuador[i5, ]) # 2 warnings ## as above # overdispersion: 6.78e+06 fit_nb2_corr_6plus <- glmmTMB(form_glmm_ar1, family = nbinom2, data = ecuador[i6, ]) # 2 warnings # 1.65e+06 fit_nb2_corr_7plus <- glmmTMB(form_glmm_ar1, family = nbinom2, data = ecuador[i7, ]) # No warnings # disp parameter 4.7e+06 # "disp" 0.4432844 fit_nb2_corr_8plus <- glmmTMB(form_glmm_ar1, family = nbinom2, data = ecuador[i8, ]) dispfun(fit_nb2_corr_8plus) # 0.4432844 diagnose_hessian(fit_nb2_corr_8plus) # betad
# 1 warning ## Model convergence problem; non-positive-definite Hessian matrix. ## Overdispersion: 2.28e-11 fit_nb1_corr_all <- glmmTMB(form_glmm_ar1, family = nbinom1, data = ecuador[, ]) #diagnose_hessian(fit_nb1_corr_all) # betad #diagnose_vcov(fit_nb2_corr_all_ES)
form_glmm_ar1 <- formula(N ~ 1 + # Null Location + # Location time_cts + # Continuous time variable (1|Specie.Code) + (1|Specie.Code:Location) + # Species-level intercept (1|year:session) + (1|year:session:Location) + (time_cts + 0|Specie.Code:Location) + ar1(as.ordered(time_cts) + # Autocorrelation across 0|Specie.Code:Location) + offset(log(tot_net_hours)) )
ranef(fit_pos_corr_all)
# 0 warnings fit_pos_corr_all <- glmmTMB(form_glmm_ar1, family = poisson, data = ecuador[, ]) diagnose_hessian(fit_pos_corr_all) dispfun(fit_pos_corr_all) # 0.4964196 overdisp_fun(fit_pos_corr_all) #0.4964196 # dispersion = 1.7967, p-value = 0.072 DHARMa::testDispersion(fit_pos_corr_all) # dispersion = 0.49642 DHARMa::testDispersion(fit_pos_corr_all, type = "PearsonChisq")
simulationOutput <- simulateResiduals(fittedModel = fit_pos_corr_all, refit = F, plot = F) plot(simulationOutput) ecuador$resids <- resid(simulationOutput)
plotQQunif (left panel) creates a qq-plot to detect overall deviations from the expected distribution, by default with added tests for correct distribution (KS test), dispersion and outliers. Note that outliers in DHARMa are values that are by default defined as values outside the simulation envelope, not in terms of a particular quantile. Thus, which values will appear as outliers will depend on the number of simulations. If you want outliers in terms of a particuar quantile, you can use the outliers() function.
plotResiduals (right panel) produces a plot of the residuals against the predicted value (or alternatively, other variable). Simulation outliers (data points that are outside the range of simulated values) are highlighted as red stars. These points should be carefully interpreted, because we actually don’t know “how much” these values deviate from the model expectation. Note also that the probability of an outlier depends on the number of simulations, so whether the existence of outliers is a reason for concern depends also on the number of simulations.
DHARMa::testDispersion(simulationOutput) DHARMa::testUniformity(simulationOutput) DHARMa::testZeroInflation(simulationOutput)
library(ggplot2) ggplot(data = ecuador, aes(y = resids, x = time_cts)) + geom_point() + facet_wrap(vars(Location))
ggplot(data = ecuador, aes(y = resids, x = time_cts)) + geom_point() + facet_wrap(vars(session))
ggplot(data = ecuador, aes(y = resids, x = time_cts, color = Location)) + geom_point() + facet_wrap(vars(Species)) + geom_smooth(method = "lm", se = T)
not better
form_glmm_ar1_poisnorm <- update(form_glmm_ar1_ES, . ~ . + (1|i))
fit_pos_corr_poisnorm <- glmmTMB(form_glmm_ar1_poisnorm, family = poisson, data = ecuador[, ]) dispfun(fit_pos_corr_poisnorm) # 0.5197247 # dispersion = 1.7967, p-value = 0.072 DHARMa::testDispersion(fit_pos_corr_poisnorm) DHARMa::testUniformity(fit_pos_corr_all)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.