knitr::opts_chunk$set(echo = TRUE)
This script calculates n null samples for each rarefied samples, calculates the metrics for those null samples, and outputs a table of the null model statistics.
library(tidyverse) library(FD) library(furrr)
Get dataframe of filenames and corresponding rarefyID's
#get list of files path <- here::here("data", "rarefied_samples") files <- list.files(path) %>% paste0(path, "/", .)
Get species pools for each timeseries
load(here::here("data/bt_traitfiltered.rda")) load(here::here("data/trait_ref.rda")) #for each rarefyID, get complete list of species species_pools <- bt_traitfiltered %>% select(rarefyid, species) %>% distinct() rm(bt_traitfiltered)
Simulation functions
#for each file, sample n times; each file is data for one community get_plot_sample <- function(file, n){ #print(filter_id) load(file) #loads file called rare_comm_save #put in matrix form comm <- rare_comm_save %>% pivot_longer(cols = starts_with("ITIS"), names_to = "species", values_to = "weight") %>% filter(weight != 0) %>% rename_with(tolower) #get rarefyid and resample number filter_rarefyid <- unique(comm$rarefyid) %>% unlist() samp_rarefy_resamp <- unique(comm$rarefy_resamp) %>% unlist() print(filter_rarefyid) #check that there are at least two species for all years, otherwise stop sampling species_count <- comm %>% select(year, species) %>% count(year) if (min(species_count$n) < 2){ save_dir <- here::here("data", "no_null_sample") fs::dir_create(save_dir) write.csv(rare_comm_save, paste0(save_dir, "/", filter_rarefyid, "_samp", samp_rarefy_resamp, ".csv")) return() } #only species occuring at that rarefyid should be in the species pool species_pool <- species_pools %>% filter(rarefyid == filter_rarefyid) %>% pull(species) %>% na.omit() %>% unique() #get column of unique scientific names #function to get sample species and abundances get_samp_species <- function(data){ samp_species <- sample(species_pool, n_distinct(data$species)) samp_abun <- sample(data$weight) df <- as_tibble(cbind(samp_species, samp_abun)) %>% mutate(year = unique(data$year)) return(df) } #loop over the number of samples site_samples <- data.frame() for (j in 1:n){ iter_samples <- data.frame() #for each year get a sample species list for(i in unique(comm$year)){ year_data <- comm %>% filter(year == i) iter_samples <- rbind(iter_samples, get_samp_species(year_data)) } site_samples <- rbind(site_samples, iter_samples %>% mutate(n = j)) } plot_samples <- site_samples %>% mutate(rarefyid = filter_rarefyid, rarefy_resamp = samp_rarefy_resamp) %>% pivot_wider(names_from = samp_species, values_from = samp_abun) #save out samples save_dir <- here::here("data", "null_samples") fs::dir_create(save_dir) write.csv(plot_samples, paste0(save_dir, "/", filter_rarefyid, "_samp", samp_rarefy_resamp, ".csv")) return(plot_samples) }
Get sample dataframe
#get null samples for each rarefied sample future::plan(multiprocess, workers = 30) sample_occurrence <- future_map(files, get_plot_sample, n = 500, .options = furrr_options(seed = TRUE))
Figure out which samples didn't get null samples
null_files <- list.files(here::here("data", "null_samples")) %>% paste0(here::here("data", "null_samples"), "/", .) nulls <- as.data.frame(null_files) %>% mutate(store_nums = str_extract_all(null_files, "[0-9]+")) %>% unnest_wider(store_nums) %>% unite(rarefyid, `...1`, `...2`, sep = "_") %>% rename(rarefy_resamp = `...3`) %>% mutate(rarefy_files = paste0(path, "/count_cell", rarefyid, "_sample", rarefy_resamp, ".rda")) # null_samples <- map_dfr(null_files, ~read_csv(.x) %>% select(rarefyid, rarefy_resamp) %>% distinct()) %>% # mutate(rarefy_samp_file = paste0(path, "/count_cell", rarefyid, "_sample", rarefy_resamp, ".rda")) missing_files <- setdiff(files, nulls$rarefy_files) future::plan(multiprocess) sample_occurrence <- future_map(missing_files, get_plot_sample, n = 500, .options = furrr_options(seed = TRUE)) %>% bind_rows() %>% replace(is.na(.), 0)
Calculated FD metrics for all the null samples
get_nullplot_FD <- function(file_path, traits, ...) { library(FD) plot_samples <- read_csv(file_path) %>% replace(is.na(.), 0) plot_samples %>% select(rarefyid, rarefy_resamp) %>% distinct() %>% print() #get species matrix sample_species_mat <- plot_samples %>% select(starts_with("ITIS")) %>% #want only species x site info select(sort(tidyselect::peek_vars())) #sort columns to be in alphabetical order, required for dbFDI() #get trait matrix trait_mat <- get_traitMat(colnames(sample_species_mat), trait_data = traits) save_dir <- here::here("data", "fd_null_output") fs::dir_create(save_dir) get_FD_safe <- possibly(get_FD, otherwise = data_frame()) # check if data are counts or abundances, then calculated FD max_val <- max(unlist(map(sample_species_mat, max))) if(max_val > 1){ FD_mets <- get_FD_safe(species_mat = sample_species_mat, trait_mat = trait_mat, year_list = plot_samples$year, data_id = unique(plot_samples$rarefyid), samp_id = unique(plot_samples$rarefy_resamp), w.abun = TRUE, m = "min", corr = "cailliez") }else{ FD_mets <- get_FD_safe(species_mat = sample_species_mat, trait_mat = traits, year_list = plot_samples$year, data_id = unique(plot_samples$rarefyid), samp_id = unique(plot_samples$rarefy_resamp), w.abun = FALSE, m = "min", corr = "cailliez") } sample_fd <- FD_mets$fd_met %>% cbind(plot_samples %>% select(rarefyid, rarefy_resamp, n)) setwd(here::here()) write_tsv(sample_fd, paste0(save_dir, "/null_rarefy", unique(plot_samples$rarefyid), "samp", unique(plot_samples$rarefy_resamp), ".tsv")) return(sample_fd) } #get list of null sample files null_files <- dir(here::here("data", "null_samples"), "*.csv") %>% paste0(here::here("data", "null_samples"), "/", .) #in order to get this to run in parallel, must devtools::install() for the biodivTS package plan(multiprocess, workers = 30) null_FD <- future_map_dfr(null_files, ~get_nullplot_FD(.x, traits = trait_ref), .options = furrr_options(seed = TRUE))
Figure out which nulls still need to be calculated
null_files <- dir(here::here("data", "null_samples"), "*.csv") null_files <- as.data.frame(null_files) %>% mutate(rarefyid = str_match(null_files, "(.*?)\\s*_samp")[,2], rarefy_samp = str_match(null_files, "samp\\s*(.*?)\\s*.csv")[,2]) fd_files <- dir(here::here("data", "fd_null_output"), "*.tsv") fd_files <- as.data.frame(fd_files) %>% mutate(rarefyid = str_match(fd_files, "null_rarefy\\s*(.*?)\\s*samp")[,2], rarefy_samp = str_match(fd_files, "samp\\s*(.*?)\\s*.tsv")[,2]) missing_files <- null_files %>% left_join(fd_files) %>% filter(is.na(fd_files)) %>% pull(null_files) %>% paste0(here::here("data", "null_samples/"), .) get_nullplot_FD_safe <- possibly(get_nullplot_FD, otherwise = data_frame()) plan(multiprocess, workers = 120) null_FD <- future_map_dfr(missing_files, ~get_nullplot_FD_safe(.x, traits = trait_ref), .options = furrr_options(seed = TRUE))
Create Null table
#get list of null fd sample files path <- here::here("data", "fd_null_output") null_files <- dir(path, "*.tsv") %>% paste0(path, "/", .) #function to read in null fd samples and get info to calculate stats process_nulls <- function(file_path, n){ data <- read_tsv(file_path) %>% rename(richness = nbsp) %>% select(-sing.sp, -n) %>% group_by(year) means <- data %>% select(-rarefyid, -richness, -rarefy_resamp) %>% summarise(across(everything(), ~mean(.x, na.rm = TRUE))) %>% pivot_longer(cols = -year, names_to = "metric", values_to = "mean") sds <- data %>% select(-rarefyid, -richness, -rarefy_resamp) %>% summarise(across(everything(), ~sd(.x, na.rm = TRUE))) %>% pivot_longer(cols = -year, names_to = "metric", values_to = "sd") stats <- left_join(means, sds, by = c("year", "metric")) %>% left_join(data %>% select(year, rarefyid, rarefy_resamp, richness) %>% distinct(), by = "year") %>% mutate(se = sd/sqrt(n), lowerCI = mean - qt(0.975, n - 1) * se, upperCI = mean + qt(0.975, n - 1) * se) %>% select(year, rarefyid, rarefy_resamp, everything()) return(stats) } plan(multiprocess, workers = 120) null_table <- future_map_dfr(null_files, process_nulls, n = 500) #write out null table usethis::use_data(null_table)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.