In karinorman/biodivTS: What the Package Does (One Line, Title Case)

knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
library(lme4)
library(broom.mixed)
library(lmerTest)
library(emmeans)

Read in data, get final dataset for models

load(here::here("data/model_data.rda"))

model_data <- filter(model_data, taxa != "Amphibians")

Let's look at models for FRic

FRic model

fric_data <- model_data %>% filter(metric == "FRic") %>%
  rename(FRic = value) %>%
   mutate(year_scaled = scale(year),
          taxa = as.factor(taxa),
             realm = as.factor(realm),
             climate = as.factor(climate),
          logFRic = log(FRic))

let's do the full model for all studies

fric_full_fit <- lmer("FRic ~ year_scaled + (year_scaled|study_id/rarefyID)", data = fric_data, REML = FALSE)

logfric_full_fit <- lmer("logFRic ~ year_scaled + (year_scaled|study_id/rarefyID)", data = fric_data, control = lmerControl(optimizer = "bobyqa"))

# remove the outlier
logfric_full_fit_ex <- lmer("logFRic ~ year_scaled + (year_scaled|study_id/rarefyID)", data = fric_data %>% filter(study_id != 515), control = lmerControl(optimizer = "bobyqa"))

look at normality

# hist(ranef(fric_full_fit)$study_id$`(Intercept)`)
# shapiro.test(ranef(fric_full_fit)$study_id$`(Intercept)`) # test of normality
hist(ranef(fric_full_fit)$study_id$`year_scaled`)
shapiro.test(ranef(fric_full_fit)$study_id$`year_scaled`)

hist(ranef(logfric_full_fit)$study_id$`(Intercept)`)
shapiro.test(ranef(logfric_full_fit)$study_id$`(Intercept)`)
hist(ranef(logfric_full_fit)$study_id$`year_scaled`)
shapiro.test(ranef(logfric_full_fit)$study_id$`year_scaled`)

test_normality <- function(model_fit, metric_name){
  intercepts <- ranef(model_fit)$study_id$`(Intercept)`
  slopes <- ranef(model_fit)$study_id$`year_scaled`

  hist(intercepts, main = paste("intercepts for", metric_name))
  hist(slopes, main = paste("slopes for", metric_name))

  bind_rows(tidy(shapiro.test(intercepts)) %>% mutate(type = "intercept"),
            tidy(shapiro.test(slopes)) %>% mutate(type = "slope")) %>%
    mutate(metric = metric_name)
}

test_normality(logfric_full_fit, "log(FRic)")

Look at normality for base models we've already fit

# model_table comes from 05_model_metrics.Rmd

model_df <- model_table %>%
  filter(model_id == "base") %>%
  select(metric, model, model_id) %>%
  group_by(metric, model_id) %>% 
  slice_head()

mod_normality <- map2_dfr(model_df$model, model_df$metric, test_normality)

Let's make almost everything log, not SES metrics because they're already normalized

metric_names <- unique(model_data$metric)

log_mods <- tibble(metric_name = metric_names[!metric_names %in% c("SES_FRic", "SES_FDiv", "SES_FEve")], 
                         formula = "logvalue ~ year_scaled + (year_scaled|study_id/rarefyID)",
                   model_id = "base") 

log_mods_fit <- pmap_dfr(log_mods, get_model, data = model_data)

log_model_df <- log_mods_fit %>%
  filter(model_id == "base") %>%
  select(metric, model, model_id) %>%
  group_by(metric, model_id) %>% 
  slice_head()

logmod_normality <- map2_dfr(log_model_df$model, log_model_df$metric, test_normality)

The logvalue in model_data is actually log(value + 1), which works for the CWM but not the summary metrics

log_metrics <- c("FRic", "S", "FDiv", "FEve")
log_mods <- tibble(metric_name = log_metrics, 
                         formula = "log(value) ~ year_scaled + (year_scaled|study_id/rarefyID)",
                   model_id = "base") 

logvalue_mods <- tibble(metric_name = metric_names[!metric_names %in% append(c("SES_FRic", "SES_FDiv", "SES_FEve"), log_metrics)], 
                         formula = "logvalue ~ year_scaled + (year_scaled|study_id/rarefyID)",
                   model_id = "base") 

trans_mods <- bind_rows(log_mods, logvalue_mods)

trans_mods_fit <- pmap_dfr(trans_mods, get_model, data = model_data) %>%
  mutate(REML = TRUE, bobyqa = FALSE)

# have to make sure they all converge #

#get the models that had convergence warnings
bad_models <- trans_mods_fit %>%
  select(metric, model_id, warning) %>% 
  filter(!is.na(warning)) %>% 
  distinct() %>%
  left_join(trans_mods, by = c("metric" = "metric_name", "model_id")) 

#Turn off REML for these models, and they fit! 
bad_mod_results <- pmap_dfr(bad_models %>% select(-warning), get_model, data = model_data, REML = FALSE) %>%
  mutate(REML = FALSE, bobyqa = FALSE)

#models that still won't converge
bad_models2 <- bad_mod_results %>%
  select(metric, model_id, warning) %>% 
  filter(!is.na(warning)) %>% 
  distinct() %>%
  left_join(trans_mods, by = c("metric" = "metric_name", "model_id")) 

#Fit with a different algorithm
bad_mod_results2 <- pmap_dfr(bad_models2 %>% select(-warning), get_model, data = model_data, control = lmerControl(optimizer = "bobyqa")) %>%
  mutate(REML = TRUE, bobyqa = TRUE)

transmodel_table <- bind_rows(trans_mods_fit %>% filter(is.na(warning)), 
          bad_mod_results %>% filter(is.na(warning)),
          bad_mod_results2 %>% filter(is.na(warning)))


# let's look at normality of the random effects
trans_model_df <- transmodel_table %>%
  select(metric, model, model_id, formula, REML, bobyqa) %>%
  group_by(metric, model_id) %>% 
  slice_head() %>%
  ungroup()

transmod_normality <- map2_dfr(trans_model_df$model, trans_model_df$metric, test_normality)

Let's figure out if we have outlier issues, after visually inspecting the random effects

s_mod <- trans_model_df %>% filter(metric == "S") %>% pull(model) %>% .[[1]]

intercepts <- ranef(s_mod)$study_id$`(Intercept)`
slopes <- ranef(s_mod)$study_id$`year_scaled`

which(slopes < -1) # It's row 22
ranef(s_mod)$study_id[22,] # It's study 377

s_fit <- lmer("log(value) ~ year_scaled + (year_scaled|study_id/rarefyID)", data = model_data %>% filter(metric == "S", study_id != 377), control = lmerControl(optimizer = "bobyqa"))

Let's make it a function

check_outliers <- function(start_mod, metric_name, outlier_criterion, lmer_formula, REML = TRUE, bobyqa = FALSE){

  intercepts <- ranef(start_mod)$study_id$`(Intercept)`
  slopes <- ranef(start_mod)$study_id$`year_scaled`

  rows <- which(eval(rlang::parse_expr(outlier_criterion))) # It's row 22
  print(rows)
  if(length(rows) > 1) warning(paste("There is more than one study outside the outlier cutoff for", metric_name))

  study <- as.integer(rownames(ranef(start_mod)$study_id[rows,]))

  if (isTRUE(REML)){
    if (isTRUE(bobyqa)) {
        fit <- lmer(lmer_formula, 
              data = model_data %>% filter(metric == metric_name, study_id != study), 
              control = lmerControl(optimizer = "bobyqa"))
    } else {
      fit <- lmer(lmer_formula, 
              data = model_data %>% filter(metric == metric_name, study_id != study))
    }
  } else{
    if (isTRUE(bobyqa)) {
        fit <- lmer(lmer_formula, 
              data = model_data %>% filter(metric == metric_name, study_id != study), 
              REML = FALSE,
              control = lmerControl(optimizer = "bobyqa"))
    } else {
      fit <- lmer(lmer_formula, 
              data = model_data %>% filter(metric == metric_name, study_id != study),
              REML = FALSE)
    }
  }


  tibble(metric = metric_name,
         old_estimate = tidy(start_mod) %>% filter(term == "year_scaled") %>% pull(estimate), 
         old_p = tidy(start_mod) %>% filter(term == "year_scaled") %>% pull(p.value), 
         new_estimate = tidy(fit) %>% filter(term == "year_scaled") %>% pull(estimate), 
         new_p = tidy(fit) %>% filter(term == "year_scaled") %>% pull(p.value))

}

#check_outliers("S", "slopes < -1", "log(value) ~ year_scaled + (year_scaled|study_id/rarefyID)", REML = TRUE, bobyqa = TRUE)

# create dataframe for outlier criterion based on visualize of random effects
criterion <- tibble(metric_name = c("CWM_bodymass_value", "CWM_diet_inv", "CWM_diet_scav", "CWM_diet_vect", 
                                     "CWM_diet_vend", "CWM_diet_vunk", "CWM_forstrat_aerial", "CWM_forstrat_midhigh",
                                     "CWM_forstrat_understory", "Jaccard_base", "S"),
                    outlier_criterion = c("slopes > 0.4", "slopes < -0.5", "slopes > 0.6", "slopes > 0.4", 
                                          "slopes > 0.5", "slopes < -0.5", "slopes > 0.04", "slopes < -0.05", 
                                          "slopes < -0.4", "slopes > 0.02", "slopes < -1")
                    )

# get metric info for how models were fit
outlier_mods <- trans_model_df %>% 
  select(start_mod = model, metric_name = metric, lmer_formula = formula, REML, bobyqa) %>%
  right_join(criterion) %>%
  select(start_mod, metric_name, outlier_criterion, lmer_formula, REML, bobyqa)

outlier_comp <- pmap_dfr(outlier_mods, check_outliers)

karinorman/biodivTS documentation built on Dec. 23, 2024, 10 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

karinorman/biodivTS
What the Package Does (One Line, Title Case)

In karinorman/biodivTS: What the Package Does (One Line, Title Case)

Read in data, get final dataset for models

FRic model

Look at normality for base models we've already fit

Let's make almost everything log, not SES metrics because they're already normalized

The logvalue in model_data is actually log(value + 1), which works for the CWM but not the summary metrics

Let's figure out if we have outlier issues, after visually inspecting the random effects

R Package Documentation

Browse R Packages

We want your feedback!

karinorman/biodivTS What the Package Does (One Line, Title Case)

In karinorman/biodivTS: What the Package Does (One Line, Title Case)

Read in data, get final dataset for models

FRic model

Look at normality for base models we've already fit

Let's make almost everything log, not SES metrics because they're already normalized

The logvalue in model_data is actually log(value + 1), which works for the CWM but not the summary metrics

Let's figure out if we have outlier issues, after visually inspecting the random effects

R Package Documentation

Browse R Packages

We want your feedback!

karinorman/biodivTS
What the Package Does (One Line, Title Case)