In this section, we will visualize the magnitude and direction of species-specific probability of occupancy.
# to load data library(readxl) # to handle data library(dplyr) library(readr) library(forcats) library(tidyr) library(purrr) library(stringr) # library(data.table) # to wrangle models source("R/fun_model_estimate_collection.r") source("R/fun_make_resp_data.r") # nice tables library(knitr) library(kableExtra) # plotting library(ggplot2) library(patchwork) source("R/fun_plot_interaction.r")
# list of species # Removing species after running a chi-square goodness of fit test species <- read_csv("data/species_list.csv") %>% filter(!scientific_name %in% c( "Treron affinis", "Prinia hodgsonii", "Pellorneum ruficeps", "Hypothymis azurea","Dendrocitta leucogastra","Chalcophaps indica", "Rubigula gularis", "Muscicapa dauurica", "Geokichla citrina", "Chrysocolaptes guttacristatus","Terpsiphone paradisi","Orthotomus sutorius", "Oriolus kundoo", "Dicrurus aeneus", "Cyornis tickelliae", "Copsychus fulicatus", "Oriolus xanthornus", "Alcippe poioicephala", "Ficedula nigrorufa","Dendrocitta vagabunda", "Dicrurus paradiseus", "Ocyceros griseus", "Psilopogon viridis", "Psittacula cyanocephala")) list_of_species <- as.character(species$scientific_name)
To get cumulative AIC weights, we first obtained a measure of relative importance of climatic and landscape predictors by calculating cumulative variable importance scores. These scores were calculated by obtaining the sum of model weights (AIC weights) across all models (including the top models) for each predictor across all species. We then calculated the mean cumulative variable importance score and a standard deviation for each predictor.
# which files to read file_names <- c("results/09_lc-clim-imp.xlsx") # read in sheets by species model_imp <- map(file_names, function(f) { md_list <- map(list_of_species, function(sn) { # some sheets are not found tryCatch( { readxl::read_excel(f, sheet = sn) %>% `colnames<-`(c("predictor", "AIC_weight")) %>% filter(str_detect(predictor, "psi")) %>% mutate( predictor = stringr::str_extract(predictor, pattern = stringr::regex("\\((.*?)\\)") ), predictor = stringr::str_replace_all(predictor, "[//(//)]", ""), predictor = stringr::str_remove(predictor, "\\.y") ) }, error = function(e) { message(as.character(e)) } ) }) names(md_list) <- list_of_species return(md_list) })
# bind rows model_imp <- map(model_imp, bind_rows) %>% bind_rows() # convert to numeric model_imp$AIC_weight <- as.numeric(model_imp$AIC_weight) # Let's get a summary of cumulative variable importance model_imp <- group_by(model_imp, predictor) %>% summarise( mean_AIC = mean(AIC_weight), sd_AIC = sd(AIC_weight), min_AIC = min(AIC_weight), max_AIC = max(AIC_weight), med_AIC = median(AIC_weight) ) # write to file write_csv(model_imp, "results/10_cumulative-AIC-weights.csv")
Read data back in.
# read data and make factor model_imp <- read_csv("results/10_cumulative-AIC-weights.csv") model_imp$predictor <- as_factor(model_imp$predictor)
# make nice names predictor_name <- tibble( predictor = levels(model_imp$predictor), pred_name = c( "Precipitation seasonality", "Temperature seasonality", "% Evergreen Forest", "% Deciduous Forest", "% Mixed/Degraded Forest", "% Agriculture/Settlements", "% Grassland", "% Plantations", "% Water Bodies" ) ) # rename predictor model_imp <- left_join(model_imp, predictor_name)
Prepare figure for cumulative AIC weight. Figure code is hidden in versions rendered as HTML and PDF.
fig_aic <- ggplot(model_imp) + geom_pointrange( aes( x = reorder(predictor, mean_AIC), y = mean_AIC, ymin = mean_AIC - sd_AIC, ymax = mean_AIC + sd_AIC ) ) + geom_text(aes( x = predictor, y = 0.5, label = pred_name ), size = 3, angle = 0, hjust = 0.5, vjust = 2 ) + # scale_y_continuous(breaks = seq(45, 75, 10))+ scale_x_discrete(labels = NULL) + # scale_color_brewer(palette = "RdBu", values = c(0.5, 1))+ coord_flip( ylim = c(0, 1) # ylim = c(45, 75) ) + theme_test() + theme(legend.position = "none") + labs( x = "Predictor", y = "Cumulative AIC weight" ) ggsave(fig_aic, filename = "figs/fig_aic_weight.png", device = png(), dpi = 300, width = 79, height = 120, units = "mm" )
For each species, we examined those models which had ΔAICc < 4, as these top models were considered to explain a large proportion of the association between the species-specific probability of occupancy and environmental drivers. Using these restricted model sets for each species; we created a model-averaged coefficient estimate for each predictor and assessed its direction and significance. We considered a predictor to be significantly associated with occupancy if the range of the 95% confidence interval around the model-averaged coefficient did not contain zero.
file_read <- c("results/09_lc-clim-modelEst.xlsx") # read data as list column model_est <- map(list_of_species, function(sn) { tryCatch( { readxl::read_excel(file_read, sheet = sn) %>% rename(predictor = "...1") %>% filter(str_detect(predictor, "psi")) %>% mutate( predictor = stringr::str_extract(predictor, pattern = stringr::regex("\\((.*?)\\)") ), predictor = stringr::str_replace_all(predictor, "[//(//)]", ""), predictor = stringr::str_remove(predictor, "\\.y") ) }, error = function(e) { message(as.character(e)) } ) }) # assign names names(model_est) <- list_of_species # prepare model data model_data <- tibble( scientific_name = list_of_species ) # remove null data model_est <- keep(model_est, .p = function(x) !is.null(x)) # rename model data components and separate predictors names <- c( "predictor", "coefficient", "se", "ci_lower", "ci_higher", "z_value", "p_value" ) # get data for plotting: model_est <- map(model_est, function(df) { colnames(df) <- names # df <- separate_interaction_terms(df) # df <- make_response_data(df) return(df) }) # add names and scales model_est <- imap(model_est, function(.x, .y) { mutate(.x, scientific_name = .y) }) # remove modulators model_est <- bind_rows(model_est) %>% dplyr::select(-matches("modulator")) # join data to species name model_data <- model_data %>% left_join(model_est) # Keep only those predictors whose p-values are significant: model_data <- model_data %>% filter(p_value < 0.05) %>% filter(predictor != "Int")
Export predictor effects.
# get predictor effect data data_predictor_effect <- distinct( model_data, scientific_name, se, predictor, coefficient ) # write to file write_csv(data_predictor_effect, "results/10_data-predictor-effect.csv")
Export model data.
model_data_to_file <- model_data %>% dplyr::select( predictor, scientific_name ) # remove .y model_data_to_file <- model_data_to_file %>% mutate(predictor = str_remove(predictor, "\\.y")) write_csv( model_data_to_file, "results/10_data-occupancy-predictors.csv" )
Read in data after clearing R session.
# first merge species trait data with significant predictor species_trait <- read.csv("data/species-trait-dat.csv") sig_predictor <- read.csv("results/10_data-predictor-effect.csv") merged_species_traits <- inner_join(sig_predictor, species_trait, by = c("scientific_name" = "scientific_name") ) write_csv( merged_species_traits, "results/10_results-predictors-species-traits.csv" ) # read from file model_data <- read_csv("results/10_results-predictors-species-traits.csv")
Fix predictor name.
# remove .y from predictors model_data <- model_data %>% mutate_at(.vars = c("predictor"), .funs = function(x) { stringr::str_remove(x, ".y") })
# is the coeff positive? how many positive per scale per predictor per axis of split? # now splitting by habitat --- forest or open country data_predictor <- mutate(model_data, direction = coefficient > 0 ) %>% filter(predictor != "Int", predictor != "Ibio4^2" & # If you had squared terms predictor != "Ibio15^2") %>% rename(habitat = "Habitat.type") %>% count( predictor, habitat, direction ) %>% mutate(mag = n * (if_else(direction, 1, -1))) # wrangle data to get nice bars data_predictor <- data_predictor %>% dplyr::select(-n) %>% drop_na(direction) %>% mutate(direction = ifelse(direction, "positive", "negative")) %>% pivot_wider(values_from = "mag", names_from = "direction") %>% mutate_at( vars(positive, negative), ~ if_else(is.na(.), 0, .) ) data_predictor_long <- data_predictor %>% pivot_longer( cols = c("negative", "positive"), names_to = "effect", values_to = "magnitude" ) # write write_csv( data_predictor_long, "results/10_data-predictor-direction-nSpecies.csv" )
Prepare data to determine the direction (positive or negative) of the effect of each predictor. How many species are affected in either direction?
# join with predictor names and relative AIC data_predictor_long <- left_join(data_predictor_long, model_imp)
Prepare figure of the number of species affected in each direction. Figure code is hidden in versions rendered as HTML and PDF.
# habitat labels labels <- c( "Generalist" = "Generalist Birds", "Forest" = "Forest Birds" ) fig_predictor <- ggplot(model_imp) + geom_hline( yintercept = 0, lwd = 0.2, col = "grey" ) + geom_col( data = data_predictor_long, aes( x = reorder(predictor, mean_AIC), y = magnitude, fill = effect ), # position = position_dodge(width = 1), width = 0.3 ) + geom_text(aes( x = predictor, y = 0, label = pred_name ), angle = 0, vjust = 2, size = 3 ) + geom_text( data = tibble( x = c(5), y = c(-30, 30), label = c("Negative effect", "Positive effect") ), aes( x, y, label = label ), angle = 90 ) + scale_fill_discrete_diverging( palette = "Berlin", l1 = 50, rev = T ) + scale_x_discrete(labels = NULL) + scale_y_continuous( labels = abs, limits = c(-30, 30) ) + # uncomment below to split by habitat facet_grid(~habitat, labeller = labeller(habitat = labels)) + coord_flip() + theme_test() + theme(legend.position = "none") + labs(x = "Environmental Covariate", y = "# Species") ggsave(fig_predictor, filename = "figs/fig_04.png", dpi = 300, width = 100, height = 120, units = "mm" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.