knitr::opts_chunk$set(echo = FALSE) library(drake) library(dplyr) library(ggplot2) library(grid) theme_set(theme_bw()) all_di <- read.csv(here::here("analysis", "reports", "submission2", "all_di.csv"), stringsAsFactors = F) all_ct <- read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv"), stringsAsFactors = F) #fia_ct <- read.csv(here::here("fia_cts.csv")) #all_ct <- rbind(all_ct, fia_ct) all_ct <- all_ct %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) all_di <- all_di %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20) %>% left_join(all_ct) all_di <- all_di %>% group_by_all() %>% mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl), skew_percentile_mean = mean(skew_percentile, skew_percentile_excl), simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl), shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl), nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl),) %>% ungroup() all_di <- all_di %>% mutate(in_fia = ifelse(Dataset == "FIA", "FIA", "Other datasets"))
diss_results <- all_di %>% filter(nparts > 20) %>% group_by(Dataset) %>% summarize(high_diss = mean(real_po_percentile_excl > 95), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High dissimilarity` = paste0(signif(100 * high_diss, 2), "%; n = ", nsites)) %>% select(-nsites) skew_results <- all_di %>% filter(nparts > 40, s0 > 2) %>% group_by(Dataset) %>% summarize(high_skew = mean(skew_percentile_excl > 97.5), low_skew = mean(skew_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High skew` = paste0(signif(100 * high_skew, 2), "%; n = ", nsites), `Low skew` = paste0(signif(100 * low_skew, 2), "%; n = ", nsites)) %>% select(-nsites) metric_results <- all_di %>% filter(nparts > 40) %>% group_by(Dataset) %>% summarize(high_simpson = mean(simpson_percentile_excl > 97.5), low_simpson = mean(simpson_percentile < 2.5), high_shannon = mean(shannon_percentile_excl > 97.5), low_shannon = mean(shannon_percentile < 2.5), high_nsingletons = mean(nsingletons_percentile_excl > 97.5), low_nsingletons = mean(nsingletons_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High Simpson` = paste0(signif(100 * high_simpson, 2), "%; n = ", nsites), `Low Simpson` = paste0(signif(100 * low_simpson, 2), "%; n = ", nsites), `High Shannon` = paste0(signif(100 * high_shannon, 2), "%; n = ", nsites), `Low Shannon` = paste0(signif(100 * low_shannon, 2), "%; n = ", nsites), `High proportion of rare species` = paste0(signif(100 * high_nsingletons, 2), "%; n = ", nsites), `Low proportion of rare species` = paste0(signif(100 * low_nsingletons, 2), "%; n = ", nsites)) %>% select(-nsites) all_results <- left_join(diss_results, skew_results) %>% left_join(metric_results) cols1 <- c("Dataset", "High dissimilarity", "High proportion of rare species", "High skew", "Low Simpson", "Low Shannon") all_results %>% select(cols1) # # cols2 <- c("Dataset", "Low proportion of rare species", "Low skew", "High Simpson", "High Shannon") # # all_results %>% # select(cols2)
Table 1. Proportions of extreme values for percentile scores for observed SADs compared to samples from the feasible set. For dissimilarity, this is the proportion of percentile scores >95; by chance, ~5% of scores should be in this extreme. For all other metrics, this is the proportion <2.5 or >97.5; by chance ~2.5% of scores should be in either extreme. n refers to the number of communities included for each dataset for each metric. The proportions shown are for the directions of effects observed for most datasets; for the opposite-direction effects, see Table S5.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.