knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(avesdemazan)

Introduction

This script

  1. Re-fits the best model
  2. Extracts the random slopes

Preliminaries

Load packages

# Data cleaning
library(dplyr)      
library(tidyr)   
library(reshape2)
library(stringi)  


# Model fitting
library(lme4)       # glmer() for glmm
library(glmmTMB)    # glmm with AR-1  
library(blme)       # bglmer (used?)

# Model evaluation
library(afex)       # functionall_fit()
library(multcomp)   
library(emmeans)
library(arm)        # se.fit()

# Result visualization
library(ggplot2)    
library(ggstance)  
library(grid)
library(gridExtra)  

#  markdown etc
library(knitr)     

afex::all_fit

Best model

# This is the best model structure
data(fit_pois_corr_FULL_RF)

# This is a reduced model structure
data(fit_pois_corr_only_sppLoc)

# mis-specified models, for comparison
data(fit_nb2_corr_misspec)  # nb mis-spec
data(fit_pois_corr_misspec) # pois mis-spec

# check modesl
diagnose(fit_pois_corr_FULL_RF)
diagnose(fit_pois_corr_only_sppLoc) # has issue with Wald stats
diagnose(fit_nb2_corr_misspec)
diagnose(fit_pois_corr_misspec) # has issue with Wald stats

Save best model to the object best_model.

best_model <- fit_pois_corr_FULL_RF
usethis::use_data(best_model,
                  overwrite = TRUE)
usethis::use_r("best_model")

Examine best model

Examine best model and compare with ES-type model

fixef(best_model)
fixef(fit_pois_corr_only_sppLoc)

AIC(best_model,fit_pois_corr_only_sppLoc)
summary(best_model)
summary(fit_pois_corr_only_sppLoc)

Look at random effects

What is the model formula?

formula(best_model)
x <- summary(best_model)
x$varcor

Get the species-specific random slopes, which are nested within Location

random-slopes specification (time_cts + 0 | Specie.Code:Location)

ar1 specification: ar1(as.ordered(time_cts) + 0 | Specie.Code:Location)

# best model
ranefsx <- ranef(best_model)

# reduxed model
ranefsx_redox <- ranef(fit_pois_corr_only_sppLoc) #reduced model

# mis-specified model
ranefsx_nb_misspec <- ranef(fit_nb2_corr_misspec)
ranefsx_pois_misspec  <- ranef(fit_pois_corr_misspec)



# overall rand eff object
names(ranefsx)

# zi = zero inflated; empty
ranefsx$zi

# location of what we want
## random slopes in "Specie.Code:Location" 
## AR1 stuff     also in"Specie.Code:Location" 
names(ranefsx$cond)

# everything associate with Specie.Code:Location
## intercepts
## rand slopes
## ar1

# best model
sppLocRE    <- ranefsx$cond$`Specie.Code:Location`

# for comparison
sppLocRE_redox <- ranefsx_redox$cond$`Specie.Code:Location`
sppLocRE_poismisSpec    <- ranefsx_nb_misspec$cond$`Specie.Code:Location`
sppLocRE_nbmisSpec     <- ranefsx_pois_misspec$cond$`Specie.Code:Location`


dim(sppLocRE) == dim(sppLocRE_ES)

plot(sppLocRE$time_cts ~ sppLocRE_redox$time_cts, main = "Best vs. redopx")
abline(a = 0, b = 1)
plot(sppLocRE$time_cts ~ sppLocRE_poismisSpec$time_cts, 
     main = "Best vs mis-spec pois")
abline(a = 0, b = 1)
plot(sppLocRE$time_cts ~ sppLocRE_nbmisSpec$time_cts, 
     main = "Best vs mis-spec nb")
abline(a = 0, b = 1)

abline(a = 0, b = 1)

# get number of species
head(ranefsx$cond$`Specie.Code:Location`)
N_randslopes <- dim(ranefsx$cond$`Specie.Code:Location`)[1] # 79 x 33
N_randslopes

Snapshot of the random slope

NOTE: these random slopes are deviations from the OVERALL slope parameter of the model!

head(ranefsx$cond$`Specie.Code:Location`)
vcov(best_model)

Process random slopes

Extract SD / var using TMB::sdreport()

"After optimization of an AD model, sdreport() is used to calculate standard deviations of all model parameters"

Get TMB sd report

This takes a second

s1 <- TMB::sdreport(obj = best_model$obj, 
                    getJointPrecision = TRUE, 
                    bias.correct = TRUE)

The summary output of sdreport are the coefficients and SE for the main effects of the model

s1

This is the same result

summary(s1,"fixed")

This is basically the sames as summary() on the original model object

summary(s1,"fixed", p.value = TRUE)

The summary for the random effects

head(summary(s1, "random") , 10)

raneff_summary <- as.data.frame(summary(s1, "random"))

There are MANY random effects coefficients

dim(summary(s1, "random"))

The output of the sdreport() is complex

is(s1)
names(s1)

# covariance matrix of fixed effects
s1$cov.fixed

The diagonal of the SE cov matrix is 1336 (OLD comment by NLB: "I assume the whole matrix isn't available b/c its big and/or b/c its not very useful"; not sure what I meant by this....)

x <- s1$diag.cov.random
x <- as.data.frame(x)
dim(x) # 1336

Compare the summary output to the covariance matrix diagonal

raneff_summary2 <- cbind(raneff_summary, x)
dim(raneff_summary2)
head(raneff_summary2)

Convert

s2 <- sqrt(s1$diag.cov.random)
length(s2) #1336

# ? 
## 1345 x 1345
test <- solve(s1$jointPrecision)

1345-1336

Extract something to get SD

#?
## output is just 0 (?)
parameters <- best_model$obj$env$par
parameters <- as.data.frame(parameters)
head(parameters) 

# get parameters names
## 4 "beta", rest "b"
## 4 betas = intercept, time_cts, etc
parameters$names <- names(best_model$obj$env$par)
head(parameters)

# get sd from "test" object 
parameters$sd <- sqrt(diag(test))

dim(parameters)

Create column for name of coefficient

NOTE: here we have hard-coded indices

i <- N_randslopes
length(parameters$variable) # 1345
parameters$variable <- NA  # create new column
parameters$variable[1:4] <- c("(Intercept)","LocationMAIN","LocationMASE","time_cts")

parameters$variable[5:162] <-   c(rep("Intercept", i), 
                                  rep("time_cts", i))
# only 162 parameters
## but parameters objects is 1345; what are the rest?
4 + N_randslopes*2

1345 - c(4 + N_randslopes*2) # 1183

# There are a LOT of parameters associated with the AR1 stuff
## about 38 spp x 31 time points
38*31

# This accounts for most of the rows in parameters; 
## 1345: size of joint precisions matrix
## 4 = number of fixed effects
## 2*N_randslopes b/c intercept and slope
## AR1 stuff is 38 * 31
1345 - c(4 + 2*N_randslopes) - 38*31

Name coefficients

parameters$spp <- NA
fixed_eff_names <- rep("Population", 4) #why population?
rand_eff_intercept_names <- rownames(ranef(best_model)[[1]]$`Specie.Code:Location`)
rand_eff_slope_names <- rand_eff_intercept_names
parameters$spp[1:162] <- c(fixed_eff_names, 
                           rand_eff_intercept_names,    
                           rand_eff_slope_names)

loop over x and combine y into sd

TODO: check - things are hard coded here - does that matter?

I think ES codded the delta method here

# slope estimate = -0.028207, SE = 0.009887
test[4,4] # SD for slope
sqrt(test[4,4]) 
parameters$aggregate_sd <- NA

# Slopes start at 84
head(parameters[75:88,])
N_randslopes+4 # last intercept
N_randslopes+4 +1  #first slope

slope.i <- 4

# note: i starts at 0
for (i in 0:78)
{ 

  # combined SD                  
  parameters$aggregate_sd[84+i] <- sqrt(test[slope.i,slope.i] + # fixef slope var
                                                test[84+i,84+i] +     # ranefslope var  
                                                2*test[84+i,slope.i]) # fixef-ranef covariance
}

Extract SE

TODO: check - things are hard coded here - does that matter?

N_randslopes+4 + 1  #first slope
2*N_randslopes + 4   #last slope

# get subst that we want
subset_for_merge <- parameters[84:162,c("spp","aggregate_sd")]
colnames(subset_for_merge)[1] <- "id"

sd_int   <- s2[1:79]
sd_slope <- s2[80:158]

slopes <- ranef(best_model)[[1]]$`Specie.Code:Location` # 79 x 2
dim(slopes)
#ranef(best_model)[[1]]$`Specie.Code` # 38 x 33
# 79 * 2 + 38 * 33

s0 <- best_model$sdr

                       #time coefficient                      # ? individual time slope?
slopes$time_cts_tot <- summary(best_model)[[6]]$cond[4,1] + slopes$time_cts
slopes$time_sd  <- sd_slope
slopes$time_lb  <- slopes$time_cts_tot - 1.96*slopes$time_sd
slopes$time_ub  <- slopes$time_cts_tot + 1.96*slopes$time_sd

slopes$spp <- substring(rownames(slopes), 1, 4)
slopes$loc <- substring(rownames(slopes), 6, 9)
slopes$id <- rownames(slopes)

slopes.unique <- slopes[!duplicated(slopes$spp),]

slopes$spp2 <- ordered(slopes$spp, levels = slopes.unique[order(slopes.unique$time_cts_tot),]$spp)
slopes$id2 <- ordered(slopes$id, levels = slopes[order(slopes$time_cts_tot),]$id)

slopes <- left_join(slopes, subset_for_merge, by = "id")

# calculate lower (lb) and upper (ub) bound of CIs
## NOTE: use slopes$time_cts_tot (as shown and as was in orig script)
###      NOTE time_cts
slopes$time_lb2  <- slopes$time_cts_tot - 1.96*slopes$aggregate_sd
slopes$time_ub2  <- slopes$time_cts_tot + 1.96*slopes$aggregate_sd

# ggplot(data = slopes, aes(x = time_cts, y = slopes$spp2)) +
#   geom_point(aes(x = time_cts, y = spp2)) +
#   geom_segment(aes(x = time_lb, xend = time_ub, y = spp2, yend = spp2)) +
#   facet_grid(~loc) +
#   geom_vline(aes(xintercept = 0, color = "No Change")) +
#   geom_vline(aes(xintercept = summary(best_model)[[6]]$cond[4,1], color = "Population-level estimate")) +
#   #geom_rect(aes(xmin = -0.016779 - 1.96*0.003354, xmax = -0.016779 + 1.96*0.003354, ymin = -Inf, ymax = Inf), fill = "blue", alpha = 0.2) +
#   xlab("Species-level time coefficient \n") + ylab("Species Code") +
#   #scale_colour_manual(values = c("Population-level estimate" = "blue")) +
#   scale_colour_manual(values =c("No Change" = "red", "Population-level estimate" = "blue")) +
#   #scale_fill_manual(values = c("blue" = "blue")) +
#   theme(panel.spacing = unit(1, "lines")) +
#   theme(legend.position = "bottom", legend.title = element_blank(), legend.background = element_blank(), legend.box.background = element_rect(colour = "black"))

# habita labels for plots
## main labels

# habitat_labels <- c("Secondary Forest\n(LLAV)", "Primary Forest\n(MASE)", "Introduced Forest\n(MAIN)")
#habitat_labels <- c("Montane shrubland\n(SHRUB)", "Native Forest\n(NATIVE)", "Mixed Native & Introduced Forest\n(MIXED)")
habitat_labels <- c("SHRUB", "NATIVE", "MIXED")

## vector names
# names(habitat_labels) <- c("LLAV","MASE", "MAIN")
names(habitat_labels) <- c("SHRUB","NATIVE", "MIXED")


##
## Species list
data("Ecuador_Species_List")
spp_trt <- Ecuador_Species_List


spp_trt$Species_SciName <- gsub("[\\(\\)]", "", regmatches(spp_trt$Species, gregexpr("\\(.*?\\)", spp_trt$Species)))


spp_trt$Species_SciName <- paste0(substr(spp_trt$Species_SciName,1,1),". ", sub("^\\S+\\s+", '', spp_trt$Species_SciName))


i.col <- which(colnames(spp_trt) %in% c("Specie.Code", "Species_SciName"))

spp_trt[,i.col[1]] <- as.character(spp_trt[,i.col[1]])

slopes.unique <- dplyr::left_join(slopes.unique, spp_trt[,i.col], by = c("spp" = "Specie.Code"))

slopes <- dplyr::left_join(slopes, spp_trt[,i.col], by = c("spp" = "Specie.Code"))

slopes$Species_SciName2 <- ordered(slopes$Species_SciName, levels = slopes.unique[order(slopes.unique$time_cts),]$Species_SciName)

slopes$Sig <- ifelse(slopes$time_ub2 < 0, "Significant", "Not Significant")

Site descriptions from MS "We placed three sampling sites in areas with unique habitat types. These included: (1) native, mature secondary, subtropical moist broadleaf forest (NATIVE) located in Mazán; (2) mixed native and non-native forest (MIXED) also located in Mazán; and (3) native montane shrubland (SHRUB) located in Llaviuco Valley."

Change labels from those used in earlier drafts of MS

slopes$loc.orig <- slopes$loc

slopes$loc <- gsub("LLAV","SHRUB",slopes$loc)
slopes$loc <- gsub("MAIN","MIXED",slopes$loc)
slopes$loc <- gsub("MASE","NATIVE",slopes$loc)

# set order
slopes$loc <- factor(slopes$loc, levels = c("NATIVE","MIXED","SHRUB"))
slopes$loc <- factor(slopes$loc, levels = c("SHRUB","MIXED","NATIVE"))
summary(slopes$loc)

Save slopes

write.csv(slopes, file = "species_specific_slops.csv")
slopes


brouwern/avesdemazan documentation built on July 26, 2022, 8:38 p.m.