knitr::opts_chunk$set(echo = FALSE)
library(drake)
library(dplyr)
library(ggplot2)
library(grid)
theme_set(theme_bw())

all_di <- read.csv(here::here("analysis", "reports", "submission2", "all_di.csv"), stringsAsFactors = F)

all_ct <-  read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv"), stringsAsFactors = F)


#fia_ct <- read.csv(here::here("fia_cts.csv"))

#all_ct <- rbind(all_ct, fia_ct)
all_ct <- all_ct %>%
  mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat),
         dat = ifelse(dat == "misc_abund_short", "misc_abund", dat))

all_di <- all_di %>%
  mutate(log_nparts = log(gmp:::as.double.bigz(nparts)),
         log_nsamples = log(nsamples),
         log_s0 = log(s0),
         log_n0 = log(n0)) %>%
  filter(n0 != s0,
         s0 != 1,
         !singletons,
         n0 != (s0 + 1)) %>%
  mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat),
         dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>%
  mutate(Dataset = dat,
         Dataset = ifelse(Dataset == "fia", "FIA", Dataset),
         Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset),
         Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset),
         Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset),
         Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>%
  filter(nparts > 20) %>%
  left_join(all_ct) 

all_di <- all_di %>%
  group_by_all() %>%
  mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl),
         skew_percentile_mean = mean(skew_percentile, skew_percentile_excl),
         simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl),
         shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl),
         nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl),) %>%
  ungroup()

all_di <- all_di %>%
  mutate(in_fia = ifelse(Dataset == "FIA", "FIA", "Other datasets"))
diss_results <- all_di %>%
  filter(nparts > 20) %>%
  group_by(Dataset) %>%
  summarize(high_diss = mean(real_po_percentile_excl > 95),
            nsites = dplyr::n()) %>%
  ungroup() %>%
  mutate(`High dissimilarity` = paste0(signif(100 * high_diss, 2), "%; n = ", nsites)) %>%
  select(-nsites)

skew_results <- all_di %>%
  filter(nparts > 40, s0 > 2) %>%
  group_by(Dataset) %>%
  summarize(high_skew = mean(skew_percentile_excl > 97.5),
            low_skew = mean(skew_percentile < 2.5),
            nsites = dplyr::n()) %>%
  ungroup() %>%
  mutate(`High skew` = paste0(signif(100 * high_skew, 2), "%; n = ", nsites),
         `Low skew` = paste0(signif(100 * low_skew, 2), "%; n = ", nsites)) %>%
  select(-nsites)

metric_results <- all_di %>%
  filter(nparts > 40) %>%
  group_by(Dataset) %>%
  summarize(high_simpson = mean(simpson_percentile_excl > 97.5),
            low_simpson = mean(simpson_percentile < 2.5),
            high_shannon = mean(shannon_percentile_excl > 97.5),
            low_shannon = mean(shannon_percentile < 2.5),
            high_nsingletons = mean(nsingletons_percentile_excl > 97.5),
            low_nsingletons = mean(nsingletons_percentile < 2.5),
            nsites = dplyr::n()) %>%
  ungroup() %>%
  mutate(`High Simpson` = paste0(signif(100 * high_simpson, 2), "%; n = ", nsites),
         `Low Simpson` = paste0(signif(100 * low_simpson, 2), "%; n = ", nsites),
         `High Shannon` = paste0(signif(100 * high_shannon, 2), "%; n = ", nsites),
         `Low Shannon` = paste0(signif(100 * low_shannon, 2), "%; n = ", nsites),
         `High proportion of rare species` = paste0(signif(100 * high_nsingletons, 2), "%; n = ", nsites),
         `Low proportion of rare species` = paste0(signif(100 * low_nsingletons, 2), "%; n = ", nsites)) %>%
  select(-nsites)

all_results <- left_join(diss_results, skew_results) %>%
  left_join(metric_results)
# 
# cols1 <- c("Dataset", "High dissimilarity", "High proportion of rare species", "High skew", "Low Simpson", "Low Shannon")
# 
# all_results %>%
#   select(cols1)


cols2 <- c("Dataset", "Low proportion of rare species", "Low skew", "High Simpson", "High Shannon")

all_results %>%
  select(cols2)

Table S5. Proportions of extreme values for percentile scores for observed SADs compared to samples from the feasible set, for the less-usual direction of effects. This is the proportion of scores <2.5 or >97.5; by chance ~2.5% of scores should be in either extreme. n refers to the number of communities included for each dataset for each metric.



diazrenata/scadsanalysis documentation built on May 14, 2021, 6:59 p.m.