knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
#library(biodivTS)
library(FD)
library(furrr)

pins::board_register_github(repo = "karinorman/biodivTS_data", branch = "master")

Create null models that would be transferable to any timeseries by treating the species pool as all species observed across the timeseries.

Simulation functions

get_plot_sample <- function(data, filter_plot, n){
  print(filter_plot)
  plot_data <- data %>% filter(plot_id == filter_plot) %>%
    select(year, id, weight)

  species_pool <- plot_data %>%
    pull(id) %>%
    na.omit() %>%
    unique() #get column of unique scientific names

  get_samp_species <- function(data){
    samp_species <- sample(species_pool, n_distinct(data$id))
    samp_abun <- sample(data$weight)

    df <- as_tibble(cbind(samp_species, samp_abun)) %>% mutate(year = unique(data$year))
    return(df)
  }

  site_samples <- data.frame()
  for (j in 1:n){
    iter_samples <- data.frame()
    for(i in unique(plot_data$year)){
      year_data <- plot_data %>% filter(year == i)
      iter_samples <- rbind(iter_samples, get_samp_species(year_data))
    }
    site_samples <- rbind(site_samples, iter_samples %>% mutate(n = j))
  }
  plot_samples <- site_samples %>%
    mutate(plot_id = filter_plot) %>%
    pivot_wider(names_from = samp_species, values_from = samp_abun)

  save_dir <- here::here("data", "null_samples")
  fs::dir_create(save_dir)
  write.csv(plot_samples, paste0(save_dir, "/", filter_plot, "_samps.csv"))

  return(plot_samples)
}

Get dataframe of samples for each plot and corresponding species and trait matrices

biotime_traits <- pins::pin_get("bt-traitfiltered", board = "github")

n = 100

trait_data <- biotime_traits %>% 
  select(-c(biotimeName, taxa, id_species, plot, year, weight, 
            traitName, scientificName, plot_id, study_id)) %>%
  distinct()

rm(biotime_traits)

Get sample dataframe

future::plan(multiprocess)
sample_occurrence <- future_map(unique(biotime_traits$rarefyid), get_plot_sample, data = biotime_traits, n = n) %>%
  bind_rows() %>%
  replace(is.na(.), 0)
get_nullplot_FD <- function(file_path, traits, ...) {

  plot_samples <- read_csv(file_path, col_types = cols(plot_id = "c")) %>% mutate(plot_id = as.character(plot_id)) %>% replace(is.na(.), 0)

  sample_species_mat <- plot_samples %>%
    select(-c(X1, year, plot_id, n)) %>% #want only species x site info
    select(sort(tidyselect::peek_vars())) #sort columns to be in alphabetical order, required for dbFDI()

  trait_mat <- traits %>%
    filter(id %in% colnames(sample_species_mat)) %>%
    select_if(~ length(unique(na.omit(.))) > 1)  #remove columns that have the same value for all columns

  #get binary variables so they can be excluded from rescaling 
  bin_vars <- map(trait_mat, ~ all(na.omit(.) %in% 0:1))

  trait_mat <- trait_mat %>%
    mutate_at(vars(-c(id, names(bin_vars[bin_vars == TRUE])), -ends_with("5cat")), 
              list(~as.numeric(scale(.)))) %>% #rescale variables, not binary or categorical
    arrange(id) %>%
    column_to_rownames("id")

  save_dir <- here::here("data", "fd_null_output")
  fs::dir_create(save_dir)

  #the convexhull c code creates an output file of vertices, in order to not have multiple threads writing
  #to the same file we have to create individual working directories
  tmp_dir <- paste0(save_dir, "/tmp", "/tmp_", unique(plot_samples$plot_id))
  fs::dir_create(tmp_dir)
  setwd(tmp_dir)

  print(unique(plot_samples$plot_id))
  sample_fd <- as.data.frame(dbFD_mine(x = trait_mat, a = sample_species_mat, w.abun = TRUE, ...)$fd_met) %>%
    cbind(plot_samples %>% select(year, plot_id, n))

  setwd(here::here())
  write_tsv(sample_fd, paste0(save_dir, "/null", unique(plot_samples$plot_id), ".tsv"))

  return(sample_fd)
}

files <-  dir(here::here("data", "null_samples"), "*.csv") %>%
  paste0(here::here("data", "null_samples"), "/", .)

plan(multiprocess)
null_FD <- future_map_dfr(files, get_nullplot_FD, traits = trait_data,
                   m = "min", corr = "cailliez")

Create Null table

path <- here::here("data", "fd_null_output")
null_files <-  dir(path, "*.tsv") %>%
  paste0(path, "/", .)

process_nulls <- function(file_path, n){
  data <- read_tsv(file_path, col_types = cols(plot_id = "c")) %>%
    select(year, plot_id, richness = nbsp, everything(), -sing.sp, -starts_with("CWM"), -n) %>%
    group_by(year)

  means <- data %>%
    select(-plot_id, -richness) %>%
    summarise(across(everything(), mean)) %>%
    pivot_longer(cols = -year, names_to = "metric", values_to = "mean")

  sds <- data %>%
    select(-plot_id, -richness) %>%
    summarise(across(everything(), sd)) %>%
    pivot_longer(cols = -year, names_to = "metric", values_to = "sd")

  stats <- left_join(means, sds, by = c("year", "metric")) %>%
    left_join(data %>% select(year, plot_id, richness) %>% distinct(), by = "year") %>%
    mutate(se = sd/sqrt(n),
           lowerCI = mean - qt(0.975, n - 1) * se, 
           upperCI = mean + qt(0.975, n - 1) * se)  %>%
    select(year, plot_id, everything())

  return(stats)
}

plan(multiprocess)
null_table <- future_map_dfr(null_files, process_nulls, n = n)

usethis::use_data(null_table)
pins::pin(null_table, board = "github")


karinorman/biodivTS documentation built on Dec. 23, 2024, 10 p.m.