knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) library(furrr) library(FD) library(biodivTS) source(here::here("R", "dbFD_mine.R")) pins::board_register_github(repo = "karinorman/biodivTS", branch = "master")
Get FD metrics for each study
FD function:
get_fd <- function(obs_data, filter_plot, ...){ require(FD) data <- obs_data %>% filter(plot_id == filter_plot) fs::dir_create(paste0(here::here("data"), "/fd_attempt")) write_tsv(data.frame(), paste0(here::here("data"), "/fd_attempt/", filter_plot, ".tsv")) # #check which column the abundaces weights are stored in (abundance or biomass) # if (sum(data$year_abun) == 0) {weight <- "year_biomass"} else {weight <- "year_abun"} species_mat <- data %>% select(year, id, weight) %>% arrange(id) %>% pivot_wider(names_from = id, values_from = weight, values_fill = 0) years <- select(species_mat, year) trait_mat <- data %>% select(-c(biotimeName, taxa, id_species, plot, year, weight, traitName, scientificName, plot_id, study_id) ) %>% distinct() %>% select_if(~ length(unique(na.omit(.))) > 1) #remove columns that have the same value for all columns #get binary variables so they can be excluded from rescaling bin_vars <- map(trait_mat, ~ all(na.omit(.) %in% 0:1)) trait_mat <- trait_mat %>% mutate_at(vars(-c(id, names(bin_vars[bin_vars == TRUE])), -ends_with("5cat")), list(~as.numeric(scale(.)))) %>% #rescale variables, not binary or categorical arrange(id) %>% column_to_rownames("id") fs::dir_create(paste0(here::here("data"), "/fd_output")) #the convexhull c code creates an output file of vertices, in order to not have multiple threads writing #to the same file we have to create individual working directories tmp_dir <- paste0(here::here("data", "fd_output", "tmp"), "/tmp_", filter_plot) fs::dir_create(tmp_dir) setwd(tmp_dir) fd_data <- dbFD_mine(trait_mat, select(species_mat, -year), w.abun = TRUE, ...) fd_out <- as_tibble(fd_data$fd_met)[,1:8] %>% cbind(years, .) %>% mutate(plot_id = unlist(filter_plot), trait_count = dim(trait_mat)[2]) write_tsv(fd_out, paste0(here::here("data"), "/fd_output/", filter_plot, "_fd.tsv")) write_tsv(fd_data$pca_traits, paste0(here::here("data"), "/fd_output/", filter_plot, "_traits.tsv")) setwd(here::here()) print(filter_plot) #return(fd_out) } get_fd_safe <- possibly(get_fd, NA)
First pass at calculating FD metrics from FD package:
data <- pins::pin_get("biotime-traits", board = "github") plots <- data %>% pull(plot_id) %>% unique() system.time(fd <- map(plots, get_fd_safe, obs_data = data, m = "min", corr = "cailliez"))
The dataset is too big to copy to each parallel thread, so we split the dataset into chunks
#number of chunks to split original dataset into num_groups = 4 plot_sizes <- data %>% count(plot_id) %>% arrange(desc(n)) %>% mutate(group = rep_len(1:num_groups, dim(.)[1])) get_parallel_FD <- function(df, group_num){ group_plots <- plot_sizes %>% filter(group == group_num) group_data <- df %>% filter(plot_id %in% group_plots$plot_id) future::plan("multiprocess", workers = 4) fd <- future_map(group_plots$plot_id, get_fd_safe, obs_data = group_data, m = "min", corr = "cailliez") %>% rbind() print(group_num) return(fd) } fd <- map(1:num_groups, get_parallel_FD, df = data)
Read data back in, excluding sites/plots as they become a problem:
files <- dir(here::here("data", "fd_output"), "*_fd.tsv") fd_data <- files %>% paste0(here::here("data", "fd_output"), "/", .) %>% map_dfr(~read_tsv(.x, col_types = "dddddddddcd")) #%>% mutate(plot_id = as.character(plot_id))) remain <- data %>% filter(!plot_id %in% fd_data$plot_id) plot_sizes <- remain %>% count(plot_id) get_fd_error <- safely(get_fd) system.time(fd <- map(plot_sizes$plot_id, get_fd_error, obs_data = data, m = "min", corr = "cailliez")) system.time(fd <- map(plot_sizes$plot_id, get_fd_safe, obs_data = data, m = "min", corr = "cailliez"))
Format FD table with null values
null_table <- pins::pin_get("null-table", board = "github") fd_table <- fd_data %>% select(year, plot_id, richness = nbsp, everything(), -sing.sp) %>% gather(metric, value, contains("F"), RaoQ) %>% left_join(biotime_traits %>% select(plot_id, study_id, plot) %>% distinct()) %>% left_join(null_table %>% select(plot_id, year, richness, metric, mean, sd), by = c("year", "plot_id", "richness", "metric")) %>% mutate(SES = (value - mean)/sd) usethis::use_data(fd_table) pins::pin(fd_table, board = "github")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.