knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(avesdemazan)
This script
# Data cleaning library(dplyr) library(tidyr) library(reshape2) library(stringi) # Model fitting library(lme4) # glmer() for glmm library(glmmTMB) # glmm with AR-1 library(blme) # bglmer (used?) # Model evaluation library(afex) # functionall_fit() library(multcomp) library(emmeans) library(arm) # se.fit() # Result visualization library(ggplot2) library(ggstance) library(grid) library(gridExtra) # markdown etc library(knitr)
afex::all_fit
# This is the best model structure data(fit_pois_corr_FULL_RF) # This is a reduced model structure data(fit_pois_corr_only_sppLoc) # mis-specified models, for comparison data(fit_nb2_corr_misspec) # nb mis-spec data(fit_pois_corr_misspec) # pois mis-spec # check modesl diagnose(fit_pois_corr_FULL_RF) diagnose(fit_pois_corr_only_sppLoc) # has issue with Wald stats diagnose(fit_nb2_corr_misspec) diagnose(fit_pois_corr_misspec) # has issue with Wald stats
Save best model to the object best_model
.
best_model <- fit_pois_corr_FULL_RF usethis::use_data(best_model, overwrite = TRUE) usethis::use_r("best_model")
Examine best model and compare with ES-type model
fixef(best_model) fixef(fit_pois_corr_only_sppLoc) AIC(best_model,fit_pois_corr_only_sppLoc)
summary(best_model) summary(fit_pois_corr_only_sppLoc)
What is the model formula?
formula(best_model)
x <- summary(best_model) x$varcor
Get the species-specific random slopes, which are nested within Location
random-slopes specification (time_cts + 0 | Specie.Code:Location)
ar1 specification: ar1(as.ordered(time_cts) + 0 | Specie.Code:Location)
# best model ranefsx <- ranef(best_model) # reduxed model ranefsx_redox <- ranef(fit_pois_corr_only_sppLoc) #reduced model # mis-specified model ranefsx_nb_misspec <- ranef(fit_nb2_corr_misspec) ranefsx_pois_misspec <- ranef(fit_pois_corr_misspec) # overall rand eff object names(ranefsx) # zi = zero inflated; empty ranefsx$zi # location of what we want ## random slopes in "Specie.Code:Location" ## AR1 stuff also in"Specie.Code:Location" names(ranefsx$cond) # everything associate with Specie.Code:Location ## intercepts ## rand slopes ## ar1 # best model sppLocRE <- ranefsx$cond$`Specie.Code:Location` # for comparison sppLocRE_redox <- ranefsx_redox$cond$`Specie.Code:Location` sppLocRE_poismisSpec <- ranefsx_nb_misspec$cond$`Specie.Code:Location` sppLocRE_nbmisSpec <- ranefsx_pois_misspec$cond$`Specie.Code:Location` dim(sppLocRE) == dim(sppLocRE_ES) plot(sppLocRE$time_cts ~ sppLocRE_redox$time_cts, main = "Best vs. redopx") abline(a = 0, b = 1) plot(sppLocRE$time_cts ~ sppLocRE_poismisSpec$time_cts, main = "Best vs mis-spec pois") abline(a = 0, b = 1) plot(sppLocRE$time_cts ~ sppLocRE_nbmisSpec$time_cts, main = "Best vs mis-spec nb") abline(a = 0, b = 1) abline(a = 0, b = 1) # get number of species head(ranefsx$cond$`Specie.Code:Location`) N_randslopes <- dim(ranefsx$cond$`Specie.Code:Location`)[1] # 79 x 33 N_randslopes
Snapshot of the random slope
NOTE: these random slopes are deviations from the OVERALL slope parameter of the model!
head(ranefsx$cond$`Specie.Code:Location`)
vcov(best_model)
Extract SD / var using TMB::sdreport()
"After optimization of an AD model, sdreport() is used to calculate standard deviations of all model parameters"
getJointPrecision = Optional
. Return full joint precision matrix of random effects and parameters?sdreport
doc:"The full joint covariance is not returned by default, because it may require large amounts of memory. It may be obtained by specifying getJointPrecision=TRUE
, ..."bias.correct
= logical indicating if bias correction should be appliedThis takes a second
s1 <- TMB::sdreport(obj = best_model$obj, getJointPrecision = TRUE, bias.correct = TRUE)
The summary output of sdreport
are the coefficients and SE for the main effects of the model
s1
This is the same result
summary(s1,"fixed")
This is basically the sames as summary()
on the original model object
summary(s1,"fixed", p.value = TRUE)
The summary for the random effects
head(summary(s1, "random") , 10) raneff_summary <- as.data.frame(summary(s1, "random"))
There are MANY random effects coefficients
dim(summary(s1, "random"))
The output of the sdreport()
is complex
is(s1) names(s1) # covariance matrix of fixed effects s1$cov.fixed
The diagonal of the SE cov matrix is 1336 (OLD comment by NLB: "I assume the whole matrix isn't available b/c its big and/or b/c its not very useful"; not sure what I meant by this....)
x <- s1$diag.cov.random x <- as.data.frame(x) dim(x) # 1336
Compare the summary output to the covariance matrix diagonal
raneff_summary2 <- cbind(raneff_summary, x) dim(raneff_summary2) head(raneff_summary2)
s2 <- sqrt(s1$diag.cov.random) length(s2) #1336 # ? ## 1345 x 1345 test <- solve(s1$jointPrecision) 1345-1336
Extract something to get SD
#? ## output is just 0 (?) parameters <- best_model$obj$env$par parameters <- as.data.frame(parameters) head(parameters) # get parameters names ## 4 "beta", rest "b" ## 4 betas = intercept, time_cts, etc parameters$names <- names(best_model$obj$env$par) head(parameters) # get sd from "test" object parameters$sd <- sqrt(diag(test)) dim(parameters)
Create column for name of coefficient
NOTE: here we have hard-coded indices
i <- N_randslopes length(parameters$variable) # 1345 parameters$variable <- NA # create new column parameters$variable[1:4] <- c("(Intercept)","LocationMAIN","LocationMASE","time_cts") parameters$variable[5:162] <- c(rep("Intercept", i), rep("time_cts", i)) # only 162 parameters ## but parameters objects is 1345; what are the rest? 4 + N_randslopes*2 1345 - c(4 + N_randslopes*2) # 1183 # There are a LOT of parameters associated with the AR1 stuff ## about 38 spp x 31 time points 38*31 # This accounts for most of the rows in parameters; ## 1345: size of joint precisions matrix ## 4 = number of fixed effects ## 2*N_randslopes b/c intercept and slope ## AR1 stuff is 38 * 31 1345 - c(4 + 2*N_randslopes) - 38*31
Name coefficients
parameters$spp <- NA fixed_eff_names <- rep("Population", 4) #why population? rand_eff_intercept_names <- rownames(ranef(best_model)[[1]]$`Specie.Code:Location`) rand_eff_slope_names <- rand_eff_intercept_names parameters$spp[1:162] <- c(fixed_eff_names, rand_eff_intercept_names, rand_eff_slope_names)
loop over x and combine y into sd
TODO: check - things are hard coded here - does that matter?
I think ES codded the delta method here
# slope estimate = -0.028207, SE = 0.009887 test[4,4] # SD for slope sqrt(test[4,4])
parameters$aggregate_sd <- NA # Slopes start at 84 head(parameters[75:88,]) N_randslopes+4 # last intercept N_randslopes+4 +1 #first slope slope.i <- 4 # note: i starts at 0 for (i in 0:78) { # combined SD parameters$aggregate_sd[84+i] <- sqrt(test[slope.i,slope.i] + # fixef slope var test[84+i,84+i] + # ranefslope var 2*test[84+i,slope.i]) # fixef-ranef covariance }
TODO: check - things are hard coded here - does that matter?
N_randslopes+4 + 1 #first slope 2*N_randslopes + 4 #last slope # get subst that we want subset_for_merge <- parameters[84:162,c("spp","aggregate_sd")] colnames(subset_for_merge)[1] <- "id" sd_int <- s2[1:79] sd_slope <- s2[80:158] slopes <- ranef(best_model)[[1]]$`Specie.Code:Location` # 79 x 2 dim(slopes) #ranef(best_model)[[1]]$`Specie.Code` # 38 x 33 # 79 * 2 + 38 * 33 s0 <- best_model$sdr #time coefficient # ? individual time slope? slopes$time_cts_tot <- summary(best_model)[[6]]$cond[4,1] + slopes$time_cts slopes$time_sd <- sd_slope slopes$time_lb <- slopes$time_cts_tot - 1.96*slopes$time_sd slopes$time_ub <- slopes$time_cts_tot + 1.96*slopes$time_sd slopes$spp <- substring(rownames(slopes), 1, 4) slopes$loc <- substring(rownames(slopes), 6, 9) slopes$id <- rownames(slopes) slopes.unique <- slopes[!duplicated(slopes$spp),] slopes$spp2 <- ordered(slopes$spp, levels = slopes.unique[order(slopes.unique$time_cts_tot),]$spp) slopes$id2 <- ordered(slopes$id, levels = slopes[order(slopes$time_cts_tot),]$id) slopes <- left_join(slopes, subset_for_merge, by = "id") # calculate lower (lb) and upper (ub) bound of CIs ## NOTE: use slopes$time_cts_tot (as shown and as was in orig script) ### NOTE time_cts slopes$time_lb2 <- slopes$time_cts_tot - 1.96*slopes$aggregate_sd slopes$time_ub2 <- slopes$time_cts_tot + 1.96*slopes$aggregate_sd # ggplot(data = slopes, aes(x = time_cts, y = slopes$spp2)) + # geom_point(aes(x = time_cts, y = spp2)) + # geom_segment(aes(x = time_lb, xend = time_ub, y = spp2, yend = spp2)) + # facet_grid(~loc) + # geom_vline(aes(xintercept = 0, color = "No Change")) + # geom_vline(aes(xintercept = summary(best_model)[[6]]$cond[4,1], color = "Population-level estimate")) + # #geom_rect(aes(xmin = -0.016779 - 1.96*0.003354, xmax = -0.016779 + 1.96*0.003354, ymin = -Inf, ymax = Inf), fill = "blue", alpha = 0.2) + # xlab("Species-level time coefficient \n") + ylab("Species Code") + # #scale_colour_manual(values = c("Population-level estimate" = "blue")) + # scale_colour_manual(values =c("No Change" = "red", "Population-level estimate" = "blue")) + # #scale_fill_manual(values = c("blue" = "blue")) + # theme(panel.spacing = unit(1, "lines")) + # theme(legend.position = "bottom", legend.title = element_blank(), legend.background = element_blank(), legend.box.background = element_rect(colour = "black")) # habita labels for plots ## main labels # habitat_labels <- c("Secondary Forest\n(LLAV)", "Primary Forest\n(MASE)", "Introduced Forest\n(MAIN)") #habitat_labels <- c("Montane shrubland\n(SHRUB)", "Native Forest\n(NATIVE)", "Mixed Native & Introduced Forest\n(MIXED)") habitat_labels <- c("SHRUB", "NATIVE", "MIXED") ## vector names # names(habitat_labels) <- c("LLAV","MASE", "MAIN") names(habitat_labels) <- c("SHRUB","NATIVE", "MIXED") ## ## Species list data("Ecuador_Species_List") spp_trt <- Ecuador_Species_List spp_trt$Species_SciName <- gsub("[\\(\\)]", "", regmatches(spp_trt$Species, gregexpr("\\(.*?\\)", spp_trt$Species))) spp_trt$Species_SciName <- paste0(substr(spp_trt$Species_SciName,1,1),". ", sub("^\\S+\\s+", '', spp_trt$Species_SciName)) i.col <- which(colnames(spp_trt) %in% c("Specie.Code", "Species_SciName")) spp_trt[,i.col[1]] <- as.character(spp_trt[,i.col[1]]) slopes.unique <- dplyr::left_join(slopes.unique, spp_trt[,i.col], by = c("spp" = "Specie.Code")) slopes <- dplyr::left_join(slopes, spp_trt[,i.col], by = c("spp" = "Specie.Code")) slopes$Species_SciName2 <- ordered(slopes$Species_SciName, levels = slopes.unique[order(slopes.unique$time_cts),]$Species_SciName) slopes$Sig <- ifelse(slopes$time_ub2 < 0, "Significant", "Not Significant")
Site descriptions from MS "We placed three sampling sites in areas with unique habitat types. These included: (1) native, mature secondary, subtropical moist broadleaf forest (NATIVE) located in Mazán; (2) mixed native and non-native forest (MIXED) also located in Mazán; and (3) native montane shrubland (SHRUB) located in Llaviuco Valley."
Change labels from those used in earlier drafts of MS
slopes$loc.orig <- slopes$loc slopes$loc <- gsub("LLAV","SHRUB",slopes$loc) slopes$loc <- gsub("MAIN","MIXED",slopes$loc) slopes$loc <- gsub("MASE","NATIVE",slopes$loc) # set order slopes$loc <- factor(slopes$loc, levels = c("NATIVE","MIXED","SHRUB")) slopes$loc <- factor(slopes$loc, levels = c("SHRUB","MIXED","NATIVE"))
summary(slopes$loc)
Save slopes
write.csv(slopes, file = "species_specific_slops.csv")
slopes
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.