library(tidyverse)
library(dplyr)
library(FD)
library(spData)
library(sf)
library(tmap)
library(furrr)
library(here)
library(functional.diversity)


#piggyback::pb_download(repo = "karinorman/functional_diversity", dest = here::here())

load(here::here("data", "trait.rda"))
load(here::here("data", "bbs.rda"))


min_year = 2006 #define the minimum year of sampling to include

p <- st_crs(wkt='PROJCS["USA_Contiguous_Albers_Equal_Area_Conic",
    GEOGCS["GCS_North_American_1983",
            DATUM["North_American_Datum_1983",
            SPHEROID["GRS_1980",6378137,298.257222101]],
            PRIMEM["Greenwich",0],
            UNIT["Degree",0.017453292519943295]],
            PROJECTION["Albers_Conic_Equal_Area"],
            PARAMETER["False_Easting",0],
            PARAMETER["False_Northing",0],
            PARAMETER["longitude_of_center",-96],
            PARAMETER["Standard_Parallel_1",29.5],
            PARAMETER["Standard_Parallel_2",45.5],
            PARAMETER["latitude_of_center",37.5],
            UNIT["Meter",1],
            AUTHORITY["EPSG","102003"]]')

We want to be able to figure out if a site's Functional Diversity (FD) is significantly different than what we would expect for a site with that richness level in that region. First we must simulate a null model of the relationship between FD and species diversity for a given region.

Simulation functions

#site level FD with region assignments
file <- here::here("data", "bbs_site_FD.rda")
if (file.exists(file)){
  load(file)
}else{
  bbs_site_FD <- get_complete_site_data()
  usethis::use_data(bbs_site_FD)
}

get_region_samples <- function(region_name){
  region_data <- filter(bbs_site_FD, region == region_name)
  rich_levels <- unique(region_data$nbsp)

  species_pool <- bbs %>% 
    filter(year > min_year & site_id %in% unique(region_data$site_id)) %>%
    select(scientific) %>%
    unique() #get column of unique scientific names

  get_samp_species <- function(richness){
    samp_species <- sample(species_pool$scientific, richness)
    df <- cbind(richness, samp_species)
    return(df)
  }

  region_samples <- data.frame()
  for(i in rich_levels){region_samples <- rbind(region_samples, get_samp_species(i))}
  region_samples %>%
    mutate(value = 1, region = region_name) %>%
    spread(samp_species, value) #%>%
    #mutate(region = region_name)

}

#drop trait columns that are the same value for every observation, and therefore providing no additional info
clean_trait_matrix <- function(trait_matrix){
  uniq_vals <- sapply(trait_matrix, function(x){length(unique(x))}) #get number of unique values of each trait
  col_names <- names(which(uniq_vals == 1)) #find columns where there is only one value for all observations
  return(select(trait_matrix, -col_names))
}

get_sample_FD <- function(x){
  #get random sample for each region and richness level
  sample_occurrence <- map_dfr(unique(bbs_site_FD$region), get_region_samples) 

  sample_species_mat <- select(sample_occurrence, -richness, -region) %>% #want only species x site info
    select(sort(current_vars())) #sort columns to be in alphabetical order, required for dbFDI()
  sample_trait_mat <- clean_trait_matrix(get_trait_matrix(colnames(sample_species_mat)))

  sample_fd <- as.data.frame(dbFD(x = sample_trait_mat, a = sample_species_mat, w.abun = FALSE))

  df <- sample_occurrence %>%
    select(region, richness) %>%
    bind_cols(sample_fd) %>%
    mutate(merge_test = case_when(
      richness == nbsp ~ TRUE,
      TRUE ~ FALSE
    ))

  if(sum(df$merge_test) != dim(df)[1]) warning("richness and region columns may not have been appropriately joined with FD data")

  path <- paste0(here(), "/data/fd_samples/", x, "_tmp.tsv.bz2")
  write_tsv(df, path)
  pb_upload(path)
}

Simulations

n <- 1000 #sample size

if(!dir.exists(here::here("data", "fd_samples"))){
  dir.create(paste0(here::here(), "/data/fd_samples"))
  system.time({out <- future_map(1:n, get_sample_FD)})
}

sim <- purrr::map_dfr(fs::dir_ls(path = here::here("data", "fd_samples"), glob="*.tsv.bz2"), readr::read_tsv, .id = "sample")

means <- sim %>%
  select(region, richness, FEve, FRic, FDis, RaoQ, starts_with("CWM")) %>%
  group_by(region, richness) %>%
  summarise_all(mean) %>%
  gather("metric", "mean", -c(region, richness))

sds <- sim %>%
  select(region, richness, FEve, FRic, FDis, RaoQ, starts_with("CWM")) %>%
  group_by(region, richness) %>%
  summarise_all(sd) %>%
  gather("metric", "sd", -c(region, richness))

stats <- left_join(means, sds, by = c("region", "richness", "metric")) %>%
  mutate(se = sd/sqrt(n),
         lowerCI = mean - qt(0.975, n - 1) * se, 
         upperCI = mean + qt(0.975, n - 1) * se)  

usethis::use_data(stats)

Map of regions with bbs sites

bcr <- get_ecoreg_shp()

#dissolve region polygons to get rid of state/providence boundaries
bcr_regions <- lwgeom::st_make_valid(bcr) %>%
  #filter(COUNTRY %in% c("USA","CANADA")) %>%
  st_set_precision(-10000) %>%
  group_by(BCRNAME) %>%
  summarize() %>%
  filter(BCRNAME != "NOT RATED") %>%
   mutate(region_shape = case_when(
    BCRNAME == "APPALACHIAN MOUNTAINS" ~ TRUE,
    TRUE ~ FALSE
  ))

#get basemap
data("World")
nam <- st_as_sf(World) %>% 
  filter(continent == "North America", subregion == "Northern America")
st_crs(nam) <- 54012

#map
regions_ma <- #tm_shape(nam, projection = p$proj4string) +
  #tm_borders() +
tm_shape(bcr_regions) +
  tm_borders(col = "black") +
  #tm_fill(col = "region_shape", palette = c("white", "sienna"), alpha = 0.25, legend.show = FALSE) +
tm_shape(st_as_sf(bbs_site_FD)) +
  tm_dots(col = "nbsp", size = 0.05, palette = "Blues", title = "Richness") +
tm_layout(legend.position = c("left", "bottom"))

path <- paste0(here::here(), "/figures")
if(!dir.exists(path)){
  dir.create(path)
}

tmap_save(regions_ma, paste0(path, "/bbs_sites.jpeg"))

Example Hypervolume

master_traits <- get_trait_matrix() %>%
  rownames_to_column(var = "scientific")

site_traits <- bbs %>%
  filter(site_id == 17016, year > min_year) %>%
  select(site_id, scientific) %>%
  unique() %>%
  left_join(master_traits, by = "scientific") %>%
  #select(-c(site_id, scientific, diet_5cat, pelagicspecialist, forstrat_speclevel, nocturnal, forstrat_watbelowsurf, forstrat_wataroundsurf)) %>% #remove categorical variables
  select(starts_with("diet"), bodymass_value, -diet_5cat) %>%
  scale(., center = TRUE, scale = TRUE)

vol <- hypervolume(site_traits)

plot(vol, show.3d = TRUE, show.legend = TRUE, 
     names = c("seed", "", "",""), plot.3d.axes.id = c(7,9,11),
     contour.type = "ball",
     color = c("sienna", "black"))

Plot simulated and observed metric values for a given region.

plot_region_metrics <- function(region_name, metric_names){

  region_data <- filter(bbs_site_FD, region == region_name) %>%
    select(-sing.sp) %>%
    gather(unique(stats$metric), key = "metric", value = "observed") %>%
    left_join(filter(stats, region == region_name), by = c("nbsp" = "richness", "metric", "region")) %>%
    mutate(significant = ifelse(observed > lowerCI & observed < upperCI, FALSE, TRUE), 
           observed = as.numeric(observed))

  lines <- region_data %>%
    filter(metric %in% metric_names) %>%
    ggplot(aes(x = nbsp)) + 
    geom_line(aes(y = mean)) +
    geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) +
    geom_point(aes(y = observed, color = significant)) +
    scale_color_manual(values = c("#78B7C5", "chocolate")) +
    theme_classic() +
    facet_wrap(.~metric, scales = "free") +
    ggtitle(region_name)

  lines
}

plot_region_metrics(region_name = "APPALACHIAN MOUNTAINS", metric_names = c("FRic"))
plot_region_metrics(region_name = "APPALACHIAN MOUNTAINS", metric_names = c("FDis", "FRic"))

map(unique(bbs_site_FD$region), plot_region_metrics, 
    metric_names = c("FDis", "FRic", "FEve"))

Plot just simulated Fric for a given region to see if it's fixed.

plot_fric <- function(region_name){
  filter(stats, region == region_name) %>%
    filter(metric == "FRic") %>%
    ggplot(aes(x = richness)) + 
    geom_line(aes(y = mean)) +
    geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) +
    ggtitle(region_name)
}

map(unique(bbs_site_FD$region)[1:18], plot_fric)

Plot of all null model lines

get_null_lines <- function(metric_name){
nulls <- stats %>%
  filter(metric %in% metric_name) %>%
  ggplot(aes(x = richness)) + 
  geom_line(aes(y = mean, group = region)) +
  #geom_line(aes(y = mean, group = region, color = region)) +
  #facet_wrap(.~metric, scales = "free", ncol = 1) +
  theme_classic() +
  theme(legend.position = "none")+
  ggtitle(metric_name)
}

map(c("FDis", "FRic", "FEve"), get_null_lines)

Plot one metric for all regions

plot_one_metric <- function(metric_name){

  region_data <- bbs_site_FD %>%
    select(-sing.sp) %>%
    gather(unique(stats$metric), key = "metric", value = "observed") %>%
    left_join(stats, by = c("nbsp" = "richness", "metric", "region")) %>%
    mutate(significant = ifelse(observed > lowerCI & observed < upperCI, FALSE, TRUE), 
           observed = as.numeric(observed))

  lines <- region_data %>%
    filter(metric == metric_name) %>%
    ggplot(aes(x = nbsp)) + 
    geom_line(aes(y = mean)) +
    geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) +
    geom_point(aes(y = observed, color = significant)) +
    scale_color_manual(values = c("chocolate", "#78B7C5")) +
    theme_classic() +
    facet_wrap(.~region, scales = "free")

  lines
}

plot_one_metric("FDis")

Evenness

plot_evenness <- function(regions){

  region_data <- filter(bbs_site_FD, region %in% regions) %>%
    select(-sing.sp) %>%
    gather(unique(stats$metric), key = "metric", value = "observed") %>%
    left_join(stats, by = c("nbsp" = "richness", "metric", "region")) %>%
    mutate(significant = ifelse(observed > lowerCI & observed < upperCI, FALSE, TRUE), 
           observed = as.numeric(observed))

  lines <- region_data %>%
    filter(metric == "FEve") %>%
    ggplot(aes(x = nbsp)) + 
    geom_line(aes(y = mean)) +
    geom_ribbon(aes(ymin = lowerCI, ymax = upperCI), alpha = 0.2) +
    geom_point(aes(y = observed, color = significant)) +
    scale_color_manual(values = c("chocolate", "#78B7C5")) +
    theme_classic() +
    facet_wrap(.~region, scales = "free")

  lines
}

plot_evenness(c("APPALACHIAN MOUNTAINS", "NORTHERN PACIFIC RAINFOREST", "GREAT BASIN"))

Models using local and region-level variables.

library(modeest)
site_data <- read_tsv("data/site_variables.tsv.bz2")

site_vars <- site_data %>%
  select(-year, -land_cover) %>% #exclude land_cover because we want to use a different summarising function
  na.omit() %>%
  group_by(site) %>%
  summarise_all(mean) %>%
  left_join(site_data %>% group_by(site) %>% 
              summarise(., land_cover = sort(table(land_cover),decreasing=TRUE)[1]), by = "site")



mod_df <- bbs_site_FD %>% 
  select(-c(sing.sp, qual.FRic)) %>%
  rename(richness = nbsp) %>%
  gather(key = "metric", value = "observed", -c(richness, site_id, region, geometry)) %>%
  full_join(stats, by = c("richness", "region", "metric")) %>%
  mutate(observed = as.numeric(observed), metric_adj = (observed - mean)) %>%
  filter(metric != c("CWM.diet_5cat", "CWM.pelagicspecialist", "CWM.forstrat_speclevel", "CWM.nocturnal")) %>%
  left_join(site_vars, by = c("site_id" = "site")) %>%
  select(-site_id.y) %>%
  mutate(land_cover = as.factor(land_cover), site_id = as.factor(site_id)) %>%
  select(-c(mean, sd, se, lowerCI, upperCI)) %>%
  na.omit() %>%
  mutate_at(vars(-c(richness, site_id, region, geometry, metric, land_cover)), 
            function(x){(x - mean(x)) / sd(x)})
library(GGally)

pairplots <- mod_df %>%
  filter(metric == "FDis") %>%
  select(metric_adj:ndvi, -land_cover, -ends_with("yearly")) %>%
  ggpairs()

Functional dispersion as response variable

library(lme4)

get_step_formula <- function(metric_name){
  metric_df <- mod_df %>% filter(metric == metric_name)

  print(unique(metric_df$metric))

  null <- glm(metric_adj ~ 1, data = metric_df)  
  full <- glm(metric_adj ~ prcp..mm.day.samp + srad..W.m.2.samp + tmax..deg.c.samp + 
                #tmin..deg.c.samp + 
                land_cover + npp , data = metric_df)

  return(step(null, scope=list(lower=null, upper=full), direction="forward"))
}

fric_form <- get_step_formula("FRic")
fdis_form <- get_step_formula("FDis")

plot(fitted(mod1), residuals(mod1), xlab = "fitted", ylab = "residuals")


karinorman/functional_diversity documentation built on March 6, 2020, 2:46 p.m.