This script reads in all the rarefacation sample files, gets the median value for each rarefyID and year, and then merges with null model statistics.

library(dplyr)
library(tibble)
library(stringr)
library(furrr)
library(tidyr)

Read in files

##  load species based metrics
path <- here::here("data", "rarefied_metrics")
filelist <-  dir(path, "*.rda") %>%
  paste0(path, "/", .)

species_metrics <- purrr::map_dfr(filelist, function(x) {load(x)
  return(rarefied_metrics)
    }) %>% distinct()

## load fd based metrics
path <- here::here("data", "rarefied_metrics", "fd")
filelist <-  dir(path, "*.rda") %>%
  paste0(path, "/", .)

#read in metric data and do some clean up before combining
fd_metrics <- purrr::map_dfr(filelist, function(x) {
    load(x)
    rarefyID <- str_match(x,"_cell\\s*(.*?)\\s*_sample")[,2]

    biochange_metrics <- biochange_metrics %>%
      mutate(rarefyID = rarefyID)
    return(biochange_metrics)
  }) %>% distinct()

#read in CWM metrics
path <- here::here("data", "rarefied_metrics", "cwm_noscale")
filelist <-  dir(path, "*.rda") %>%
  paste0(path, "/", .)

cwm_metrics <- purrr::map_dfr(filelist, function(x) {
  #browser()
  load(x)
  rarefyID <- str_match(x,"_cell\\s*(.*?)\\s*_sample")[,2]

  if(dim(cwm_metrics)[1] == 0){
    return(data.frame())
  }else{

    cwm_metrics <- cwm_metrics %>%
      unnest(CWM) %>%
      mutate(YEAR = as.integer(YEAR), rarefyID = rarefyID)

    return(cwm_metrics)
  }
})

## get null models
load(here::here("data/null_table.rda"))

Merge rarefied species and fd metrics and null models

rarefied_metrics <- full_join(species_metrics, fd_metrics %>% select(-CWM),
                              c("YEAR", "cell", "rarefy_resamp", "rarefyID")) %>%
  full_join(cwm_metrics %>% rename_with(~paste0("CWM_", .), -c("YEAR", "cell", "rarefy_resamp", "rarefyID")), 
            c("YEAR", "cell", "rarefy_resamp", "rarefyID")) %>%
  filter(S != 1) %>% # remove samples that had one species as they result in strange metric values
  select(-qual.FRic)


##  pull out new metadata
new_meta <- rarefied_metrics %>%
    distinct(rarefyID, SamplePool, SampleN, num_years, duration, startYear, endYear)

##  calculate the medians for all the metrics for studies that were rarefied
## exclude categorical variables - they don't make sense as medians
metric_cols <- ungroup(rarefied_metrics) %>%
  select(-c(SamplePool, SampleN, num_years, duration, startYear, endYear, type,
            CWM_diet_5cat, CWM_pelagicspecialist, CWM_diurnal, CWM_nocturnal, 
            CWM_crepuscular, CWM_dir, CWM_lar)) %>%
  tidyr::pivot_longer(-c(rarefyID, rarefied, YEAR, cell, rarefy_resamp), names_to = "metric") %>%
  left_join(null_table %>% select(-c(richness, se, lowerCI, upperCI)),
            by = c("YEAR" = "year", "rarefyID" = "rarefyid", "rarefy_resamp" = "rarefy_resamp", "metric" = "metric"))

# get a dataframe of rarefyID's and rarefication samples that we don't have null_model for, and a count of the number of resamples that are missing
no_null <- metric_cols %>%
  filter(metric %in% c("FRic", "FEve", "FDiv", "FDis"), is.na(mean)) %>%
  select(rarefyID, rarefy_resamp) %>%
  distinct()

missing_nulls <- null_table %>%
  right_join(no_null, by = c("rarefyid" = "rarefyID","rarefy_resamp")) %>%
  group_by(rarefyid) %>%
  summarise(n_missing_nulls = n_distinct(rarefy_resamp)) %>%
  mutate(commplete_null_samps = "FALSE")

# calculate SES and incorporate into metric columns
metric_cols <- metric_cols %>%
  mutate(SES = (value - mean)/sd) %>%
  #for years where species richness = richness of the species pool, FRic sd = 0, so use the original value with no SES correction
  mutate(SES = case_when(
    sd == 0 ~ 0,
    TRUE ~ SES)) %>%
  select(-c(mean, sd)) %>%
  #the next three lines go back to wide format for both SES and unadjusted values, then to long format incorporating SES values
  tidyr::pivot_wider(names_from = "metric", values_from = c("value", "SES")) %>%
  rename_at(vars(starts_with("value")), ~str_replace(., "value_", "")) %>%
  tidyr::pivot_longer(-c(rarefyID, rarefied, YEAR, cell, rarefy_resamp), names_to = "metric") %>%
  filter(!is.na(value))


rarefied_medians <- metric_cols %>%
  group_by(rarefyID, YEAR, cell, rarefied, metric) %>%
  summarise(value = median(value, na.rm = TRUE))

##  recombine with new metadata
rarefied_metrics <- inner_join(new_meta, rarefied_medians, by='rarefyID') %>%
  left_join(missing_nulls, by = c("rarefyID" = "rarefyid"))
  #inner_join(rarefied_ints)

##  save
usethis::use_data(rarefied_metrics)


karinorman/biodivTS documentation built on Dec. 23, 2024, 10 p.m.