In karinorman/biodivTS: What the Package Does (One Line, Title Case)

library(tidyverse)
library(lme4)
library(broom.mixed)
library(lmerTest)
library(emmeans)

Read in data, get final dataset for models

load(here::here("data/model_data.rda"))

model_data <- filter(model_data, taxa != "Amphibians")

Fit GLMMs to overall patterns and data broken down by groups

Function to fit a model

get_model <- function(metric_name, formula, model_id, data, study_id, ...){
  print(c(metric_name, model_id))
  qlmer <- quietly(lmer)

  #check if we need to filter for a single study
  if (missing(study_id)){
    fit_data <- data %>%
      filter(metric == metric_name) %>%
      mutate(year_scaled = scale(year),
             duration_scaled = scale(duration, scale = FALSE),
             startyear_scaled = scale(startyear, scale = FALSE),
             taxa = as.factor(taxa),
             realm = as.factor(realm),
             climate = as.factor(climate))
  } else{
    fit_data <- data %>%
      filter(metric == metric_name) %>%
      mutate(year_scaled = scale(year),
             duration_scaled = scale(duration, scale = FALSE),
             startyear_scaled = scale(startyear, scale = FALSE),
             taxa = as.factor(taxa),
             realm = as.factor(realm),
             climate = as.factor(climate)) %>%
      filter(study_id == study_id)
  }


  #fit the model
  fit <- qlmer(formula = formula, data = fit_data, ...)

  # get tidy version of the model output, capture the warnings
  tidy(fit$result) %>%
    mutate(metric = metric_name, model = list(fit$result), model_id = model_id, 
           n_ts = n_distinct(fit_data$rarefyID),
           nobs = nobs(fit$result),
           formula = formula,
           warning = ifelse(length(fit$warnings) == 0, NA, fit$warnings))

}

#get_model(data = model_data, metric_name = "SES_FRic", "value ~ year_scaled + (year_scaled|study_id/rarefyID)", model_id = "base")

Function to extract estimates for slopes of studies

get_study_slopes <- function(metric, model, model_id){

  filter_name <- ifelse(model_id == "base", "study_id", model_id)

  #get the statistics for the year variable
  year_scaled_coef <- tidy(model) %>%
    filter(term == "year_scaled") 

  #get the year estimate
  year_scaled_est <- pull(year_scaled_coef, estimate) 

  #get year variation
  year_scaled_var <- year_scaled_coef %>% 
    mutate(var = std.error*std.error) %>%
    pull(var)

  #get study level conditional estimates and add to over all estimates
  study_ests <- broom.mixed::tidy(model, effects="ran_vals") %>%
    filter(group == filter_name, term == "year_scaled") %>% 
    select(-c(effect, group, term)) %>%
    rename(cond.std.error = std.error, cond.estimate = estimate) %>%
    mutate(estimate = cond.estimate + year_scaled_est,
      cond.var = cond.std.error*cond.std.error, 
           var = cond.var + year_scaled_var,
           std.error = sqrt(var),
           upr.ci = estimate + (1.96*std.error),
           lwr.ci = estimate - (1.96*std.error),
           sig = case_when(
             lwr.ci < 0 & upr.ci > 0 ~ FALSE,
             TRUE ~ TRUE
           ),
      metric = metric,
      model_id = model_id)
}

Fit models only for BBS (study 195)

metric_names <- unique(model_data$metric)

# Fit all the models only to BBS data (filtering is done in the function)
bbs_models <- map_dfr(metric_names[metric_names != "S"], 
                      ~get_model(data = model_data, 
                                 metric_name = .x, "value ~ year_scaled + (year_scaled|rarefyID)", 
                                 model_id = "base",
                                 study_id = 195))

# Get table of estimates for the scaled year slope
bbs_models %>% 
  filter(term == "year_scaled") %>% 
  select(-model, -model_id, -formula, -n_ts, -warning, -group) %>% 
  relocate(metric) %>% 
  arrange(metric) %>% 
  View()

Fit full models and extract the BBS estimates

# Fit full models with all the studies
full_models <- map_dfr(metric_names[metric_names != "S"], 
                      ~get_model(data = model_data, 
                                 metric_name = .x, "value ~ year_scaled + (year_scaled|study_id/rarefyID)", 
                                 model_id = "base"))

#get the metrics for which the models didn't converge
non_conv <- full_models %>%
  filter(!is.na(warning)) %>% 
  pull(metric) %>% 
  unique()

#Turn off REML for these models so they'll fit
refit_full_models <- map_dfr(non_conv, 
                      ~get_model(data = model_data, 
                                 metric_name = .x, "value ~ year_scaled + (year_scaled|study_id/rarefyID)", 
                                 model_id = "base", REML = FALSE))

# check that they all fit
unique(refit_full_models$warning)

full_models <- bind_rows(full_models %>% filter(is.na(warning)),
                         refit_full_models)

# Get a dataframe with one row for each model
model_df <- full_models %>%
  select(metric, model, model_id, nobs) %>%
  group_by(metric, model_id) %>% 
  slice_head() %>%
  ungroup()

# Get slopes for just BBS for each metric
bbs_slopes <- pmap_dfr(select(model_df, -nobs), get_study_slopes) %>%
  rename(study_id = level) %>%
  filter(study_id == 195) %>% 
  mutate(p.value = 2*(1-pnorm(abs(estimate), 0, std.error)) ) %>% 
  left_join(model_df %>% select(metric, nobs)) %>%
  relocate(metric) %>% 
  arrange(metric)