knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) library(lme4) library(broom.mixed) library(lmerTest) library(emmeans) library(purrr)
Get data and put it in long form
#get data load(here::here("data/meta.rda")) load(here::here("data/bt_traitfiltered.rda")) load(here::here("data/rarefied_metrics.rda")) load(here::here("data/rarefyID_cell_centre.rda")) source(here::here("R/coords2continent.R")) meta_clean <- meta %>% select(study_id, realm, climate, general_treat, treatment, treat_comments, treat_date, habitat, protected_area, biome_map) meta_clean <- bt_traitfiltered %>% select(study_id, rarefyid, taxa) %>% distinct() %>% left_join(meta_clean) %>% mutate(rarefyid_join = stringr::str_extract(rarefyid, "[^_]*_[^_]*")) %>% left_join(rarefyID_cell_centre %>% select(-STUDY_ID), by = c("rarefyid_join" = "rarefyID")) %>% mutate(country = coords2continent(tibble(rarefyID_x = .$rarefyID_x, rarefyID_y = .$rarefyID_y))) %>% select(-c(rarefyid_join, cell_extent, rarefyID_x, rarefyID_y)) %>% distinct() rm(bt_traitfiltered) #get metric data metrics <- rarefied_metrics %>% rename_with(tolower, .cols = setdiff(everything(), one_of("rarefyID"))) %>% mutate(logvalue = log(value + 1)) %>% # create a column for log transformation that adds the absolute value integer closest to the most negative value to create all positive values left_join(meta_clean, c("rarefyID" = "rarefyid")) ## Now the data that actually makes it into the model ## #get rarefyID's that don't have enough null samples missing_null <- bind_rows(metrics %>% filter(n_missing_nulls == 1, commplete_null_samps == FALSE), metrics %>% filter(n_missing_nulls > 100)) %>% pull(rarefyID) %>% unique() model_data <- metrics %>% filter(metric %in% c("SES_FRic", "SES_FEve", "SES_FDiv", "Jaccard_base", "Jaccard_hind", "FRic", "FEve", "FDiv", "S","CWM_bodymass_value","CWM_diet_inv", "CWM_diet_scav", "CWM_diet_vfish", "CWM_diet_vunk", "CWM_forstrat_ground", "CWM_forstrat_wataroundsurf", "CWM_forstrat_watbelowsurf", "CWM_diet_fruit", "CWM_diet_planto", "CWM_diet_seed", "CWM_diet_vend", "CWM_forstrat_understory", "CWM_diet_vect", "CWM_diet_nect", "CWM_forstrat_aerial", "CWM_forstrat_canopy", "CWM_forstrat_midhigh", "CWM_age_at_maturity_max_y", "CWM_age_at_maturity_min_y", "CWM_body_size_mm", "CWM_litter_size_max_n", "CWM_litter_size_min_n", "CWM_longevity_max_y", "CWM_offspring_size_max_mm", "CWM_offspring_size_min_mm"), !rarefyID %in% missing_null) %>% group_by(metric) %>% mutate(study_id = str_extract(rarefyID, "[^_]+")) %>% mutate(year_scaled = scale(year), scale_center = attributes(year_scaled)$`scaled:center`, scale = attributes(year_scaled)$`scaled:scale`) %>% ungroup() %>% #this study has many low species richness observations, so FD metrics weren't calculated, just drop it filter(study_id != 348) %>% #somehow the taxa labels didn't get updated when the timeseries for multi-taxa studies were split up, do that now mutate(new_taxa = str_split_fixed(rarefyID, "_", n = 3)[,3], taxa = case_when( new_taxa == "bird" ~ "Birds", new_taxa == "mamm" ~ "Mammals", TRUE ~ as.character(taxa) )) %>% select(-new_taxa) usethis::use_data(model_data) usethis::use_data(meta_clean)
Write out file with all necessary metadata for the supplement.
meta %>% filter(study_id %in% model_data$study_id) %>% select(-c(link_id, date_study_added, sample_desc_name)) %>% write_csv(here::here("paper", "study_metadata.csv"))
Convergence problems! See here: https://joshua-nugent.github.io/allFit/ https://rstudio-pubs-static.s3.amazonaws.com/33653_57fc7b8e5d484c909b615d8633c01d51.html https://biologyforfun.wordpress.com/2018/04/09/help-i-have-convergence-warnings/
Model checks: https://debruine.github.io/posts/normality/ performance::check_model()
Function approach to model
get_model <- function(metric_name, formula, model_id, data, filter_study_id, bobyqa = FALSE, ...){ print(c(metric_name, model_id)) qlmer <- quietly(lmer) #print(filter_study_id) if (missing(filter_study_id)){ fit_data <- data %>% filter(metric == metric_name) %>% mutate(year_scaled = scale(year), duration_scaled = scale(duration, scale = FALSE), startyear_scaled = scale(startyear, scale = FALSE), taxa = as.factor(taxa), realm = as.factor(realm), climate = as.factor(climate)) } else{ print(paste("Filter study", filter_study_id)) fit_data <- data %>% filter(metric == metric_name) %>% mutate(year_scaled = scale(year), duration_scaled = scale(duration, scale = FALSE), startyear_scaled = scale(startyear, scale = FALSE), taxa = as.factor(taxa), realm = as.factor(realm), climate = as.factor(climate)) %>% filter(study_id == filter_study_id) print(n_distinct(fit_data$study_id)) } print(dim(fit_data)) # check if there's only one time series if(n_distinct(fit_data$rarefyID) < 2){ return(tibble()) } # # check if there are enough time points to fit # year_count <- fit_data %>% count(rarefyID) # if(any(year_count$n == 1)){ # return(tibble()) # } if(!model_id %in% c("base", "base_dur", "base_srt")){ if(fit_data %>% pull(model_id) %>% n_distinct() >1) { if (isFALSE(bobyqa)){ fit <- qlmer(formula = formula, data = fit_data, ...) } else{ fit <- qlmer(formula = formula, data = fit_data, control = lmerControl(optimizer = "bobyqa"), ...) plain_fit <- lmer(formula = formula, data = fit_data, control = lmerControl(optimizer = "bobyqa"), ...) } slopes <- get_fixed_slopes(metric = metric_name, model = fit$result, model_id = model_id, model_data = fit_data) group_slopes <- as_tibble(slopes$emtrends) %>% mutate(effect = "group_slope", group = model_id) %>% rename(std.error = SE, estimate = year_scaled.trend, term = model_id) %>% mutate(term = as.character(term)) slope_constrasts <- as_tibble(slopes$contrasts) %>% mutate(effect = "contrast", group = model_id) %>% rename(std.error = SE, term = contrast, statistic = t.ratio) df <- tidy(fit$result) %>% bind_rows(group_slopes, slope_constrasts) %>% mutate(metric = metric_name, model = ifelse(exists("plain_fit"), list(plain_fit), list(fit$result)), model_id = model_id, n_ts = n_distinct(fit_data$rarefyID), formula = formula, warning = ifelse(length(fit$warnings) == 0, NA, fit$warnings)) }else{ return(tibble()) } }else{ if (isFALSE(bobyqa)){ fit <- qlmer(formula = formula, data = fit_data, ...) } else { fit <- qlmer(formula = formula, data = fit_data, control = lmerControl(optimizer = "bobyqa"), ...) plain_fit <- lmer(formula = formula, data = fit_data, control = lmerControl(optimizer = "bobyqa"), ...) } if (missing(filter_study_id)){ df_out <- tidy(fit$result) %>% mutate(metric = metric_name, model = ifelse(exists("plain_fit"), list(plain_fit), list(fit$result)), model_id = model_id, n_ts = n_distinct(fit_data$rarefyID), formula = formula, warning = ifelse(length(fit$warnings) == 0, NA, fit$warnings)) }else df_out <- tidy(fit$result) %>% mutate(metric = metric_name, model = ifelse(exists("plain_fit"), list(plain_fit), list(fit$result)), model_id = model_id, n_ts = n_distinct(fit_data$rarefyID), formula = formula, warning = ifelse(length(fit$warnings) == 0, NA, fit$warnings), study_id = filter_study_id) } } get_fixed_slopes <- function(metric, model, model_id, model_data){ formula <- paste("pairwise", "~", model_id) fit_trends <- emtrends(model, specs = as.formula(formula), var="year_scaled", mode = "satterth", lmerTest.limit = 20105, data = model_data) return(fit_trends) } #get_model(data = model_data, metric_name = "SES_FRic", "value ~ year_scaled + (year_scaled|study_id/rarefyID) + (year_scaled|climate)", model_id = "base")
Function to fit model for studies with only one time series (so a glm)
get_glm_model <- function(metric_name, formula, model_id, data, filter_study_id, ...){ print(c(metric_name, model_id)) qglm <- quietly(glm) print(filter_study_id) fit_data <- data %>% filter(metric == metric_name) %>% mutate(year_scaled = scale(year), duration_scaled = scale(duration, scale = FALSE), startyear_scaled = scale(startyear, scale = FALSE), taxa = as.factor(taxa), realm = as.factor(realm), climate = as.factor(climate)) %>% filter(study_id == filter_study_id) print(n_distinct(fit_data$study_id)) fit <- qglm(formula = formula, data = fit_data, ...) df_out <- tidy(fit$result) %>% mutate(metric = metric_name, model = list(fit$result), model_id = model_id, n_ts = n_distinct(fit_data$rarefyID), formula = formula, warning = ifelse(length(fit$warnings) == 0, NA, fit$warnings), study_id = filter_study_id) }
Create dataframe of mixed effects models to fit
#exclude amphibians model_data <- model_data %>% filter(taxa != "Amphibians") metric_names <- unique(model_data$metric) raw_metrics <- c("SES_FRic", "SES_FDiv", "SES_FEve", "CWM_diet_inv", "CWM_forstrat_ground", "CWM_forstrat_watbelowsurf", "FEve") log_metrics <- c("FRic", "S", "FDiv") logone_metrics <- metric_names[!metric_names %in% c(raw_metrics, log_metrics)] raw_value_mods <- tibble(metric_name = raw_metrics, base = "value ~ year_scaled + (year_scaled|study_id/rarefyID)", taxa = "value ~ year_scaled * taxa + (year_scaled|study_id/rarefyID)", realm = "value ~ year_scaled * realm + (year_scaled|study_id/rarefyID)", climate = "value ~ year_scaled * climate + (year_scaled|study_id/rarefyID)", protected_area = "value ~ year_scaled * protected_area + (year_scaled|study_id/rarefyID)") %>% pivot_longer(cols = c(base, taxa, realm, climate, protected_area), names_to = "model_id", values_to = "formula") %>% #don't fit the CWM_litter_size_min_n and climate model, not enough data filter(metric_name != "CWM_litter_size_min_n" | model_id != "climate") log_mods <- tibble(metric_name = log_metrics, base = "log(value) ~ year_scaled + (year_scaled|study_id/rarefyID)", taxa = "log(value) ~ year_scaled * taxa + (year_scaled|study_id/rarefyID)", realm = "log(value) ~ year_scaled * realm + (year_scaled|study_id/rarefyID)", climate = "log(value) ~ year_scaled * climate + (year_scaled|study_id/rarefyID)", protected_area = "log(value) ~ year_scaled * protected_area + (year_scaled|study_id/rarefyID)") %>% pivot_longer(cols = c(base, taxa, realm, climate, protected_area), names_to = "model_id", values_to = "formula") logone_mods <- tibble(metric_name = logone_metrics, base = "logvalue ~ year_scaled + (year_scaled|study_id/rarefyID)", taxa = "logvalue ~ year_scaled * taxa + (year_scaled|study_id/rarefyID)", realm = "logvalue ~ year_scaled * realm + (year_scaled|study_id/rarefyID)", climate = "logvalue ~ year_scaled * climate + (year_scaled|study_id/rarefyID)", protected_area = "logvalue ~ year_scaled * protected_area + (year_scaled|study_id/rarefyID)") %>% pivot_longer(cols = c(base, taxa, realm, climate, protected_area), names_to = "model_id", values_to = "formula") mods <- bind_rows(raw_value_mods, log_mods, logone_mods) %>% select(metric_name, formula, model_id)
Fit models
#Fit models with base settings mod_results <- pmap_dfr(mods, get_model, data = model_data) %>% mutate(REML = TRUE, bobyqa = FALSE) #get the models that had convergence warnings bad_models <- mod_results %>% select(metric, model_id, warning) %>% filter(!is.na(warning)) %>% distinct() %>% left_join(mods, by = c("metric" = "metric_name", "model_id")) #Turn off REML for these models, and they fit! bad_mod_results <- pmap_dfr(bad_models %>% select(-warning), get_model, data = model_data, REML = FALSE) %>% mutate(REML = FALSE, bobyqa = FALSE) #models that still won't converge bad_models2 <- bad_mod_results %>% select(metric, model_id, warning) %>% filter(!is.na(warning)) %>% distinct() %>% left_join(mods, by = c("metric" = "metric_name", "model_id")) #Fit with a different algorithm bad_mod_results2 <- pmap_dfr(bad_models2 %>% select(-warning), get_model, data = model_data, bobyqa = TRUE) %>% mutate(REML = TRUE, bobyqa = TRUE) model_table <- bind_rows(mod_results %>% filter(is.na(warning)), bad_mod_results %>% filter(is.na(warning)), bad_mod_results2 %>% filter(is.na(warning))) %>% mutate(significant = case_when( p.value > 0.05 | lower.CL < 0 & upper.CL > 0 ~ FALSE, !is.na(p.value) | !is.na(lower.CL) ~ TRUE, TRUE ~ NA )) metric_model_table <- model_table %>% select(-model) usethis::use_data(metric_model_table)
Get list of models that couldn't be fit due to model limitations
#models that were fit uniq_metric_model <- metric_model_table %>% select(metric, model_id) %>% distinct() #models we attempted to fit uniq_mods <- mods %>% select(metric = metric_name, model_id) %>% distinct() #this is a list of models that weren't fit because there weren't enough timeseries OR they didn't converge #most traits only found for one taxa with taxa as a covariate, the nectar diet realm model was data limited missing_mods <- setdiff(uniq_mods, uniq_metric_model)
Let's identify any major outliers in slope random effects for the base models to see if they're impacting the overall trend.
First two functions to visualize and exclude outliers
test_normality <- function(model_fit, metric_name){ intercepts <- ranef(model_fit)$study_id$`(Intercept)` slopes <- ranef(model_fit)$study_id$`year_scaled` hist(intercepts, main = paste("intercepts for", metric_name)) hist(slopes, main = paste("slopes for", metric_name)) bind_rows(tidy(shapiro.test(intercepts)) %>% mutate(type = "intercept"), tidy(shapiro.test(slopes)) %>% mutate(type = "slope")) %>% mutate(metric = metric_name) } check_outliers <- function(start_mod, metric_name, outlier_criterion, lmer_formula, REML = TRUE, bobyqa = FALSE){ print(metric_name) intercepts <- ranef(start_mod)$study_id$`(Intercept)` slopes <- ranef(start_mod)$study_id$`year_scaled` rows <- which(eval(rlang::parse_expr(outlier_criterion))) # It's row 22 print(rows) if(length(rows) > 1) warning(paste("There is more than one study outside the outlier cutoff for", metric_name)) study <- as.integer(rownames(ranef(start_mod)$study_id[rows,])) if (isTRUE(REML)){ if (isTRUE(bobyqa)) { fit <- lmer(lmer_formula, data = model_data %>% filter(metric == metric_name, study_id != study), control = lmerControl(optimizer = "bobyqa")) } else { fit <- lmer(lmer_formula, data = model_data %>% filter(metric == metric_name, study_id != study)) } } else{ if (isTRUE(bobyqa)) { fit <- lmer(lmer_formula, data = model_data %>% filter(metric == metric_name, study_id != study), REML = FALSE, control = lmerControl(optimizer = "bobyqa")) } else { fit <- lmer(lmer_formula, data = model_data %>% filter(metric == metric_name, study_id != study), REML = FALSE) } } tibble(metric = metric_name, old_estimate = tidy(start_mod) %>% filter(term == "year_scaled") %>% pull(estimate), old_p = tidy(start_mod) %>% filter(term == "year_scaled") %>% pull(p.value), new_estimate = tidy(fit) %>% filter(term == "year_scaled") %>% pull(estimate), new_p = tidy(fit) %>% filter(term == "year_scaled") %>% pull(p.value)) }
Visualize the random effects for the base models
base_mods <- model_table %>% filter(model_id == "base") %>% select(metric, model, formula, REML, bobyqa) %>% group_by(metric) %>% slice_head() %>% ungroup() mod_normality <- map2_dfr(base_mods$model, base_mods$metric, test_normality)
Based on the visualization, we identify some metrics that are suspect for outliers
# create dataframe for outlier criterion based on visualize of random effects criterion <- tibble(metric_name = c("CWM_bodymass_value", "CWM_diet_inv", "CWM_diet_scav", "CWM_diet_vect", "CWM_diet_vend", "CWM_diet_vunk", "CWM_forstrat_aerial", "CWM_forstrat_midhigh", "CWM_forstrat_understory", "Jaccard_base", "S"), outlier_criterion = c("slopes > 0.4", "slopes < -20", "slopes > 0.6", "slopes > 0.4", "slopes > 0.5", "slopes < -0.5", "slopes > 0.04", "slopes < -0.05", "slopes < -0.4", "slopes > 0.02", "slopes < -1") ) unique_mod_df <- model_table %>% filter(model_id == "base") %>% select(metric, model, model_id, formula, REML, bobyqa) %>% group_by(metric) %>% slice_head() %>% ungroup() # get info for how original models were fit outlier_mods <- unique_mod_df %>% select(start_mod = model, metric_name = metric, lmer_formula = formula, REML, bobyqa) %>% right_join(criterion) %>% select(start_mod, metric_name, outlier_criterion, lmer_formula, REML, bobyqa) # refit without outliers and compare outlier_comp <- pmap_dfr(outlier_mods, check_outliers)
#get list of studies that only have one time series single_ts <- model_data %>% select(study_id, rarefyID, metric) %>% distinct() %>% count(study_id, metric) %>% filter(n == 1) %>% select(-n) %>% rename(metric_name = metric, filter_study_id = study_id) # get dataframe of mixed effects models to fit metric_df <- model_data %>% select(filter_study_id = study_id, metric_name = metric) %>% distinct() %>% mutate(model_id = "base", formula = case_when( metric_name %in% raw_metrics ~ "value ~ year_scaled + (year_scaled|rarefyID)", metric_name %in% log_metrics ~ "log(value) ~ year_scaled + (year_scaled|rarefyID)", TRUE ~ "logvalue ~ year_scaled + (year_scaled|rarefyID)" ) ) # exclude studies/metrics with only one rarefyID mixed_indv_model_df <- metric_df %>% anti_join(single_ts, by = c("metric_name", "filter_study_id")) # fit models for individual studies mixed_indv_mod_results <- pmap_dfr(mixed_indv_model_df %>% filter(!metric_name %in% c("Jaccard_base", "Jaccard_hind")), get_model, data = model_data) # they didn't all converge #get the models that had convergence warnings bad_indv_models <- mixed_indv_mod_results %>% select(metric, model_id, study_id, warning) %>% filter(!is.na(warning)) %>% distinct() %>% left_join(mixed_indv_model_df, by = c("metric" = "metric_name", "model_id", "study_id" = "filter_study_id")) %>% rename( filter_study_id = study_id) #Turn off REML for these models, and they fit! bad_indv_results <- pmap_dfr(bad_indv_models %>% select(-warning), get_model, data = model_data, REML = FALSE) %>% mutate(REML = FALSE, bobyqa = FALSE) #models that still won't converge bad_indv_models2 <- bad_indv_results %>% select(metric, model_id, warning, study_id) %>% filter(!is.na(warning)) %>% distinct() %>% left_join(mixed_indv_model_df, by = c("metric" = "metric_name", "model_id", "study_id" = "filter_study_id")) %>% rename( filter_study_id = study_id) #Fit with a different algorithm bad_indv_results2 <- pmap_dfr(bad_indv_models2 %>% select(-warning), get_model, data = model_data, control = lmerControl(optimizer = "bobyqa")) %>% mutate(REML = TRUE, bobyqa = TRUE)
There are 13 study/metric combos that can't be fit due to data limitations (too many rarefyID's )
tofit_mods <- bad_indv_results2 %>% filter(!is.na(warning)) %>% select(metric, study_id, formula) %>% distinct() missing_indv_studies <- pmap_df(tofit_mods %>% select(metric_name = metric, filter_study_id = study_id), function(metric_name, filter_study_id) {model_data %>% filter(metric_name == metric, study_id == filter_study_id) %>% mutate(n = n(), nrarefyID = n_distinct(rarefyID)) %>% select(metric, study_id, nrarefyID, n) %>% distinct()}) readr::write_csv(missing_indv_studies, here::here("paper/missing_indv_studies.csv"))
Fit Jaccard Mixed models to see which will fit
mixed_jacc_mod_results <- pmap_dfr(mixed_indv_model_df %>% filter(metric_name == "Jaccard_base"), possibly(get_model, otherwise = tibble()), data = model_data) non_conv_study <- mixed_jacc_mod_results %>% filter(!is.na(warning)) %>% pull(study_id) %>% unique() mixed_jacc_mod_results2 <- pmap_dfr(mixed_indv_model_df %>% filter(metric_name == "Jaccard_base", filter_study_id %in% non_conv_study), possibly(get_model, otherwise = tibble()), data = model_data, bobyqa = TRUE) mixed_jacc_mod_results_fin <- bind_rows(mixed_jacc_mod_results %>% filter(!study_id %in% non_conv_study), mixed_jacc_mod_results2)
Fit models for studies with only one time series
#get dataframe of glm's to fit glm_indv_model_df <- single_ts %>% mutate(model_id = "base", formula = case_when( metric_name %in% raw_metrics ~ "value ~ year_scaled", metric_name %in% log_metrics ~ "log(value) ~ year_scaled", TRUE ~ "logvalue ~ year_scaled" ) ) #fit them glm_indv_mod_results <- pmap_dfr(glm_indv_model_df, get_glm_model, data = model_data) indv_mod_results <- bind_rows(mixed_indv_mod_results %>% filter(is.na(warning)), bad_indv_results %>% filter(is.na(warning)), bad_indv_results2 %>% filter(is.na(warning)), glm_indv_mod_results, mixed_jacc_mod_results_fin ) indv_mod_table <- indv_mod_results %>% select(-model) usethis::use_data(indv_mod_table)
dur_dfs <- list(min3 = filter(model_data, duration > 2), min4 = filter(model_data, duration > 3), min5 = filter(model_data, duration > 4), min10 = filter(model_data, duration > 9)) duration_sens <- map_dfr(dur_dfs, ~pmap_dfr(filter(mods, model_id == "base"), get_model, data = .x), .id = "dataID") # first group of bad models, try with REML off bad_dur_models <- duration_sens %>% select(metric, model_id, warning, dataID) %>% filter(!is.na(warning)) %>% distinct() min3_bad <- bad_dur_models %>% filter(dataID == "min3") %>% pull(metric) min4_bad <- bad_dur_models %>% filter(dataID == "min4") %>% pull(metric) min5_bad <- bad_dur_models %>% filter(dataID == "min5") %>% pull(metric) min10_bad <- bad_dur_models %>% filter(dataID == "min10") %>% pull(metric) refit_dur <- bind_rows(pmap_dfr(filter(mods, model_id == "base", metric_name %in% min3_bad), get_model, data = dur_dfs$min3, REML = FALSE) %>% mutate(dataID = "min3"), pmap_dfr(filter(mods, model_id == "base", metric_name %in% min4_bad), get_model, data = dur_dfs$min4, REML = FALSE) %>% mutate(dataID = "min4"), pmap_dfr(filter(mods, model_id == "base", metric_name %in% min5_bad), get_model, data = dur_dfs$min5, REML = FALSE) %>% mutate(dataID = "min5"), pmap_dfr(filter(mods, model_id == "base", metric_name %in% min10_bad), get_model, data = dur_dfs$min10, REML = FALSE) %>% mutate(dataID = "min10") ) # two left that didn't converge bad_dur_models2 <- refit_dur %>% select(metric, model_id, warning, dataID) %>% filter(!is.na(warning)) %>% distinct() refit_dur_2 <- bind_rows(pmap_dfr(filter(mods, model_id == "base", metric_name == "CWM_diet_planto"), get_model, data = dur_dfs$min3, bobyqa = TRUE) %>% mutate(dataID = "min3"), pmap_dfr(filter(mods, model_id == "base", metric_name == "CWM_forstrat_midhigh"), get_model, data = dur_dfs$min5, bobyqa = TRUE) %>% mutate(dataID = "min5")) duration_sens_mods <- bind_rows(duration_sens %>% filter(is.na(warning)), refit_dur %>% filter(is.na(warning)), refit_dur_2 %>% filter(is.na(warning)))
load(here::here("data/study_table.rda")) model_data <- model_data %>% left_join(study_table %>% select(study_id, coverage) %>% mutate(study_id = as.factor(study_id))) covg_dfs <- list(covg75 = filter(model_data, taxa != "Amphibians"), covg85 = filter(model_data, taxa != "Amphibians", coverage >= 0.85), covg90 = filter(model_data, taxa != "Amphibians", coverage >= 0.90)) coverage_sens <- map_dfr(covg_dfs, ~pmap_dfr(filter(mods, model_id == "base"), get_model, data = .x), .id = "dataID") # first group of bad models, try with REML off bad_cov_models <- coverage_sens %>% select(metric, model_id, warning, dataID) %>% filter(!is.na(warning)) %>% distinct() covg75_bad <- bad_cov_models %>% filter(dataID == "covg75") %>% pull(metric) covg85_bad <- bad_cov_models %>% filter(dataID == "covg85") %>% pull(metric) covg90_bad <- bad_cov_models %>% filter(dataID == "covg90") %>% pull(metric) refit_cov <- bind_rows(pmap_dfr(filter(mods, model_id == "base", metric_name %in% covg75_bad), get_model, data = covg_dfs$covg75, REML = FALSE) %>% mutate(dataID = "covg75"), pmap_dfr(filter(mods, model_id == "base", metric_name %in% covg85_bad), get_model, data = covg_dfs$covg85, REML = FALSE) %>% mutate(dataID = "covg85"), pmap_dfr(filter(mods, model_id == "base", metric_name %in% covg90_bad), get_model, data = covg_dfs$covg90, REML = FALSE) %>% mutate(dataID = "covg90") ) coverage_sens_mods <- bind_rows(coverage_sens %>% filter(is.na(warning)), refit_cov %>% filter(is.na(warning))) sens_mods <- bind_rows(duration_sens_mods, coverage_sens_mods) %>% select(-model, -warning) %>% filter(metric != "Jaccard_next")
# Let's get a dataframe of p-values for the models we actually used # base models for all metrics base <- metric_model_table %>% filter(model_id == "base", term == "year_scaled", metric != "Jaccard_hind") # within group and between group trends for all metrics contrast <- metric_model_table %>% filter(model_id != "base", effect == "contrast") group_slope <- metric_model_table %>% filter(model_id != "base", effect == "group_slope") %>% mutate(tval = estimate/std.error, p.value = 2*pt(-abs(tval), df)) # study-level trends for species and functional diversity metrics study_trend <- indv_mod_table %>% filter(term == "year_scaled") # sensitivity models sens_trends <- sens_mods %>% filter(term == "year_scaled") pval_df <- bind_rows(base, contrast, group_slope, study_trend, sens_trends) %>% filter(metric != "Jaccard_next") adj.p <- p.adjust(pval_df$p.value, method = "BH") pval_df <- bind_cols(pval_df, p.adjust = adj.p) # # Which of the general model trends are likely spurious # pval_df %>% # filter(p.value < 0.05, p.adjust > 0.05) %>% # filter(is.na(study_id)) %>% # View() # # # Which of the study-level trends are likely spurious # pval_df %>% # filter(p.value < 0.05, p.adjust > 0.05) %>% # filter(!is.na(study_id)) %>% # View() # add adjusted p value to model table model_output <- bind_rows(metric_model_table, indv_mod_table) %>% select(-p.value) %>% left_join(pval_df) %>% select(-significant, -tval) sensitivity_output <- sens_mods %>% select(-p.value) %>% left_join(pval_df %>% select(p.value, p.adjust, effect, term, model_id, dataID, metric), na_matches = "never") %>% rename(sens_level = dataID) usethis::use_data(model_output) usethis::use_data(sensitivity_output) write_csv(sensitivity_output, here::here("paper", "sensitivity_models.csv"))
Write out table of model estimates for manuscript
#manuscript table metric_model_table %>% filter(model_id == "base", metric %in% c("S", "Jaccard_base", "SES_FRic", "SES_FEve", "SES_FDiv")) %>% select(-c(model_id, warning, statistic, df)) %>% readr::write_csv(here::here("figures", "man_model_table.csv")) #supplement of complete model output model_output %>% select(-c(n_ts, warning, lower.CL, upper.CL, REML, bobyqa)) %>% readr::write_csv(here::here("paper", "model_estimates_supp.csv"))
Is slope a function of time series characteristics?
library(multcomp) study_time_data <- model_data %>% dplyr::select(study_id, startyear, duration) %>% distinct() %>% group_by(study_id) %>% mutate(startyear = min(startyear), duration = max(duration)) %>% distinct() study_slope_data <- model_output %>% filter(!is.na(study_id), term == "year_scaled") %>% dplyr::select(study_id, metric, estimate) %>% left_join(study_time_data) duration_slope <- study_slope_data %>% group_nest(metric) %>% mutate(model = purrr::map(data, ~glm(estimate ~ duration, data = .x)), coef = purrr::map(model, tidy), ) %>% dplyr::select(metric, coef) %>% unnest(cols = c(coef)) %>% mutate(covariate = "duration") startyear_slope <- study_slope_data %>% group_nest(metric) %>% mutate(model = purrr::map(data, ~glm(estimate ~ startyear, data = .x)), coef = purrr::map(model, tidy), ) %>% dplyr::select(metric, coef) %>% unnest(cols = c(coef)) %>% mutate(covariate = "startyear") #All model estimates in one dataframe slope_models <- bind_rows(duration_slope, startyear_slope) # Which predictor variables were significant? slope_models %>% filter(term != "(Intercept)", p.value < 0.05) %>% View()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.