library(tidyverse) library(dplyr) library(FD) library(spData) library(sf) library(tmap) library(furrr) library(here) library(functional.diversity) #piggyback::pb_download(repo = "karinorman/functional_diversity", dest = here::here()) load(here::here("data", "trait.rda")) load(here::here("data", "bbs.rda")) min_year = 2006 #define the minimum year of sampling to include p <- st_crs(wkt='PROJCS["USA_Contiguous_Albers_Equal_Area_Conic", GEOGCS["GCS_North_American_1983", DATUM["North_American_Datum_1983", SPHEROID["GRS_1980",6378137,298.257222101]], PRIMEM["Greenwich",0], UNIT["Degree",0.017453292519943295]], PROJECTION["Albers_Conic_Equal_Area"], PARAMETER["False_Easting",0], PARAMETER["False_Northing",0], PARAMETER["longitude_of_center",-96], PARAMETER["Standard_Parallel_1",29.5], PARAMETER["Standard_Parallel_2",45.5], PARAMETER["latitude_of_center",37.5], UNIT["Meter",1], AUTHORITY["EPSG","102003"]]')
We want to be able to figure out if a site's Functional Diversity (FD) is significantly different than what we would expect for a site with that richness level in that region. First we must simulate a null model of the relationship between FD and species diversity for a given region.
Simulation functions
#site level FD with region assignments file <- here::here("data", "bbs_site_FD.rda") if (file.exists(file)){ load(file) }else{ bbs_site_FD <- get_complete_site_data() usethis::use_data(bbs_site_FD) } get_region_samples <- function(region_name){ region_data <- filter(bbs_site_FD, region == region_name) rich_levels <- unique(region_data$nbsp) species_pool <- bbs %>% filter(year > min_year & site_id %in% unique(region_data$site_id)) %>% select(scientific) %>% unique() #get column of unique scientific names get_samp_species <- function(richness){ samp_species <- sample(species_pool$scientific, richness) df <- cbind(richness, samp_species) return(df) } region_samples <- data.frame() for(i in rich_levels){region_samples <- rbind(region_samples, get_samp_species(i))} region_samples %>% mutate(value = 1, region = region_name) %>% spread(samp_species, value) #%>% #mutate(region = region_name) } #drop trait columns that are the same value for every observation, and therefore providing no additional info clean_trait_matrix <- function(trait_matrix){ uniq_vals <- sapply(trait_matrix, function(x){length(unique(x))}) #get number of unique values of each trait col_names <- names(which(uniq_vals == 1)) #find columns where there is only one value for all observations return(select(trait_matrix, -col_names)) } get_sample_FD <- function(x){ #get random sample for each region and richness level sample_occurrence <- map_dfr(unique(bbs_site_FD$region), get_region_samples) sample_species_mat <- select(sample_occurrence, -richness, -region) %>% #want only species x site info select(sort(current_vars())) #sort columns to be in alphabetical order, required for dbFDI() sample_trait_mat <- clean_trait_matrix(get_trait_matrix(colnames(sample_species_mat))) sample_fd <- as.data.frame(dbFD(x = sample_trait_mat, a = sample_species_mat, w.abun = FALSE)) df <- sample_occurrence %>% select(region, richness) %>% bind_cols(sample_fd) %>% mutate(merge_test = case_when( richness == nbsp ~ TRUE, TRUE ~ FALSE )) if(sum(df$merge_test) != dim(df)[1]) warning("richness and region columns may not have been appropriately joined with FD data") path <- paste0(here(), "/data/fd_samples/", x, "_tmp.tsv.bz2") write_tsv(df, path) pb_upload(path) }
Simulations
n <- 1000 #sample size if(!dir.exists(here::here("data", "fd_samples"))){ dir.create(paste0(here::here(), "/data/fd_samples")) system.time({out <- future_map(1:n, get_sample_FD)}) } sim <- purrr::map_dfr(fs::dir_ls(path = here::here("data", "fd_samples"), glob="*.tsv.bz2"), readr::read_tsv, .id = "sample") means <- sim %>% select(region, richness, FEve, FRic, FDis, RaoQ, starts_with("CWM")) %>% group_by(region, richness) %>% summarise_all(mean) %>% gather("metric", "mean", -c(region, richness)) sds <- sim %>% select(region, richness, FEve, FRic, FDis, RaoQ, starts_with("CWM")) %>% group_by(region, richness) %>% summarise_all(sd) %>% gather("metric", "sd", -c(region, richness)) stats <- left_join(means, sds, by = c("region", "richness", "metric")) %>% mutate(se = sd/sqrt(n), lowerCI = mean - qt(0.975, n - 1) * se, upperCI = mean + qt(0.975, n - 1) * se) usethis::use_data(stats)
Map of regions with bbs sites
bcr <- get_ecoreg_shp() #dissolve region polygons to get rid of state/providence boundaries bcr_regions <- lwgeom::st_make_valid(bcr) %>% #filter(COUNTRY %in% c("USA","CANADA")) %>% st_set_precision(-10000) %>% group_by(BCRNAME) %>% summarize() %>% filter(BCRNAME != "NOT RATED") %>% mutate(region_shape = case_when( BCRNAME == "APPALACHIAN MOUNTAINS" ~ TRUE, TRUE ~ FALSE )) #get basemap data("World") nam <- st_as_sf(World) %>% filter(continent == "North America", subregion == "Northern America") st_crs(nam) <- 54012 #map regions_ma <- #tm_shape(nam, projection = p$proj4string) + #tm_borders() + tm_shape(bcr_regions) + tm_borders(col = "black") + #tm_fill(col = "region_shape", palette = c("white", "sienna"), alpha = 0.25, legend.show = FALSE) + tm_shape(st_as_sf(bbs_site_FD)) + tm_dots(col = "nbsp", size = 0.05, palette = "Blues", title = "Richness") + tm_layout(legend.position = c("left", "bottom")) path <- paste0(here::here(), "/figures") if(!dir.exists(path)){ dir.create(path) } tmap_save(regions_ma, paste0(path, "/bbs_sites.jpeg"))
Example Hypervolume
master_traits <- get_trait_matrix() %>% rownames_to_column(var = "scientific") site_traits <- bbs %>% filter(site_id == 17016, year > min_year) %>% select(site_id, scientific) %>% unique() %>% left_join(master_traits, by = "scientific") %>% #select(-c(site_id, scientific, diet_5cat, pelagicspecialist, forstrat_speclevel, nocturnal, forstrat_watbelowsurf, forstrat_wataroundsurf)) %>% #remove categorical variables select(starts_with("diet"), bodymass_value, -diet_5cat) %>% scale(., center = TRUE, scale = TRUE) vol <- hypervolume(site_traits) plot(vol, show.3d = TRUE, show.legend = TRUE, names = c("seed", "", "",""), plot.3d.axes.id = c(7,9,11), contour.type = "ball", color = c("sienna", "black"))
Plot simulated and observed metric values for a given region.
plot_region_metrics <- function(region_name, metric_names){ region_data <- filter(bbs_site_FD, region == region_name) %>% select(-sing.sp) %>% gather(unique(stats$metric), key = "metric", value = "observed") %>% left_join(filter(stats, region == region_name), by = c("nbsp" = "richness", "metric", "region")) %>% mutate(significant = ifelse(observed > lowerCI & observed < upperCI, FALSE, TRUE), observed = as.numeric(observed)) lines <- region_data %>% filter(metric %in% metric_names) %>% ggplot(aes(x = nbsp)) + geom_line(aes(y = mean)) + geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) + geom_point(aes(y = observed, color = significant)) + scale_color_manual(values = c("#78B7C5", "chocolate")) + theme_classic() + facet_wrap(.~metric, scales = "free") + ggtitle(region_name) lines } plot_region_metrics(region_name = "APPALACHIAN MOUNTAINS", metric_names = c("FRic")) plot_region_metrics(region_name = "APPALACHIAN MOUNTAINS", metric_names = c("FDis", "FRic")) map(unique(bbs_site_FD$region), plot_region_metrics, metric_names = c("FDis", "FRic", "FEve"))
Plot just simulated Fric for a given region to see if it's fixed.
plot_fric <- function(region_name){ filter(stats, region == region_name) %>% filter(metric == "FRic") %>% ggplot(aes(x = richness)) + geom_line(aes(y = mean)) + geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) + ggtitle(region_name) } map(unique(bbs_site_FD$region)[1:18], plot_fric)
Plot of all null model lines
get_null_lines <- function(metric_name){ nulls <- stats %>% filter(metric %in% metric_name) %>% ggplot(aes(x = richness)) + geom_line(aes(y = mean, group = region)) + #geom_line(aes(y = mean, group = region, color = region)) + #facet_wrap(.~metric, scales = "free", ncol = 1) + theme_classic() + theme(legend.position = "none")+ ggtitle(metric_name) } map(c("FDis", "FRic", "FEve"), get_null_lines)
Plot one metric for all regions
plot_one_metric <- function(metric_name){ region_data <- bbs_site_FD %>% select(-sing.sp) %>% gather(unique(stats$metric), key = "metric", value = "observed") %>% left_join(stats, by = c("nbsp" = "richness", "metric", "region")) %>% mutate(significant = ifelse(observed > lowerCI & observed < upperCI, FALSE, TRUE), observed = as.numeric(observed)) lines <- region_data %>% filter(metric == metric_name) %>% ggplot(aes(x = nbsp)) + geom_line(aes(y = mean)) + geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) + geom_point(aes(y = observed, color = significant)) + scale_color_manual(values = c("chocolate", "#78B7C5")) + theme_classic() + facet_wrap(.~region, scales = "free") lines } plot_one_metric("FDis")
Evenness
plot_evenness <- function(regions){ region_data <- filter(bbs_site_FD, region %in% regions) %>% select(-sing.sp) %>% gather(unique(stats$metric), key = "metric", value = "observed") %>% left_join(stats, by = c("nbsp" = "richness", "metric", "region")) %>% mutate(significant = ifelse(observed > lowerCI & observed < upperCI, FALSE, TRUE), observed = as.numeric(observed)) lines <- region_data %>% filter(metric == "FEve") %>% ggplot(aes(x = nbsp)) + geom_line(aes(y = mean)) + geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) + geom_point(aes(y = observed, color = significant)) + scale_color_manual(values = c("chocolate", "#78B7C5")) + theme_classic() + facet_wrap(.~region, scales = "free") lines } plot_evenness(c("APPALACHIAN MOUNTAINS", "NORTHERN PACIFIC RAINFOREST", "GREAT BASIN"))
Models using local and region-level variables.
library(modeest) site_data <- read_tsv("data/site_variables.tsv.bz2") site_vars <- site_data %>% select(-year, -land_cover) %>% #exclude land_cover because we want to use a different summarising function na.omit() %>% group_by(site) %>% summarise_all(mean) %>% left_join(site_data %>% group_by(site) %>% summarise(., land_cover = sort(table(land_cover),decreasing=TRUE)[1]), by = "site") mod_df <- bbs_site_FD %>% select(-c(sing.sp, qual.FRic)) %>% rename(richness = nbsp) %>% gather(key = "metric", value = "observed", -c(richness, site_id, region, geometry)) %>% full_join(stats, by = c("richness", "region", "metric")) %>% mutate(observed = as.numeric(observed), metric_adj = (observed - mean)) %>% filter(metric != c("CWM.diet_5cat", "CWM.pelagicspecialist", "CWM.forstrat_speclevel", "CWM.nocturnal")) %>% left_join(site_vars, by = c("site_id" = "site")) %>% select(-site_id.y) %>% mutate(land_cover = as.factor(land_cover), site_id = as.factor(site_id)) %>% select(-c(mean, sd, se, lowerCI, upperCI)) %>% na.omit() %>% mutate_at(vars(-c(richness, site_id, region, geometry, metric, land_cover)), function(x){(x - mean(x)) / sd(x)})
library(GGally) pairplots <- mod_df %>% filter(metric == "FDis") %>% select(metric_adj:ndvi, -land_cover, -ends_with("yearly")) %>% ggpairs()
Functional dispersion as response variable
library(lme4) get_step_formula <- function(metric_name){ metric_df <- mod_df %>% filter(metric == metric_name) print(unique(metric_df$metric)) null <- glm(metric_adj ~ 1, data = metric_df) full <- glm(metric_adj ~ prcp..mm.day.samp + srad..W.m.2.samp + tmax..deg.c.samp + #tmin..deg.c.samp + land_cover + npp , data = metric_df) return(step(null, scope=list(lower=null, upper=full), direction="forward")) } fric_form <- get_step_formula("FRic") fdis_form <- get_step_formula("FDis") plot(fitted(mod1), residuals(mod1), xlab = "fitted", ylab = "residuals")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.