knitr::opts_chunk$set(echo = FALSE) library(drake) library(dplyr) library(ggplot2) library(grid) theme_set(theme_bw()) all_di <- read.csv(here::here("analysis", "reports", "submission2", "all_di.csv"), stringsAsFactors = F) all_ct <- read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv"), stringsAsFactors = F) #fia_ct <- read.csv(here::here("fia_cts.csv")) #all_ct <- rbind(all_ct, fia_ct) all_ct <- all_ct %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) all_di <- all_di %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20) %>% left_join(all_ct) all_di <- all_di %>% group_by_all() %>% mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl), skew_percentile_mean = mean(skew_percentile, skew_percentile_excl), simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl), shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl), nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl),) %>% ungroup() all_di <- all_di %>% mutate(in_fia = ifelse(Dataset == "FIA", "FIA", "Other datasets"))
plot_narrowness <- function(di_df, yvar, yvar_name) { if(grepl("sim_pos", yvar)) { min_nparts = 20 ylabel = "Mean dissimilarity" } else { min_nparts = 40 ylabel = "Breadth index" } di_df <- di_df %>% mutate(response = (di_df[[yvar]])) %>% filter(nparts > min_nparts, nparts < 10 ^ 50) ggplot(di_df, aes(nparts, response, color = Dataset)) + geom_point(data = filter(di_df, dat == "fia"), alpha = .1) + geom_point(data = filter(di_df, dat != "fia"), alpha = .1) + geom_point(data = filter(di_df, s0 < 0)) + scale_color_viridis_d(end = .9) + xlab("") + ylab(ylabel) + scale_x_log10() + ggtitle(yvar_name) + theme(legend.position = "none")+ theme(plot.title = element_text(size=10)) } plot_narrowness_legend <- function(di_df) { legend_df <- di_df %>% select(Dataset) %>% distinct() %>% mutate(ymark = dplyr::row_number()) ggplot(legend_df, aes(1, ymark, color = Dataset)) + geom_label(aes(x = 2, y = ymark, label = Dataset)) + geom_point(size = 4) + scale_color_viridis_d(end = .9) + xlab("") + ylab("") + theme(legend.position = "none") + xlim(1, 3) + theme( panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank() ) + ggtitle("Dataset") } # # gridExtra::grid.arrange(grobs = list( # plot_narrowness(all_di, "sim_pos_from_best", "Dissimilarity to central tendency"), # plot_narrowness_legend(all_di)), # ncol = 1)
plot_narrowness_hist <- function(di_df, col_name, plot_name, facetvar = "Dataset", min_s0 = 0) { if(grepl("sim_pos", col_name)) { xvarname = "95th percentile \nof dissimilarity scores" min_nparts = 40 } else { min_nparts = 20 xvarname = "Breadth index \n" } di_df <- di_df %>% mutate(response = (di_df[[col_name]]), facetvar = di_df[[facetvar]]) ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) + geom_histogram(bins = 40, boundary = 100) + theme_bw() + ylab("") + xlab(xvarname) + ggtitle( plot_name) + facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+ theme(plot.title = element_text(size=10)) }
fig_2 <- gridExtra::grid.arrange(grobs = list( plot_narrowness_hist(all_di, "sim_pos_from_best_95", "Dissimilarity to the \ncentral tendency", facetvar = "Dataset"), plot_narrowness_hist(all_di, "nsingletons_95_ratio_2t", "\nNumber of rare species", facetvar = "Dataset"), plot_narrowness_hist(all_di, "skew_95_ratio_2t", "\nSkewness", facetvar = "Dataset", min_s0 = 3), plot_narrowness_hist(all_di, "simpson_95_ratio_2t", "\nSimpson evenness", facetvar = "Dataset"), plot_narrowness_hist(all_di, "shannon_95_ratio_2t", "\nShannon diversity", facetvar = "Dataset") ), ncol = 5, left = textGrob("Number of communities", rot = 90, gp = gpar(fill = "black")), gp = gpar(fill = "white"))
Figure S6. Partially because of the uneven distribution of S and N among the different datasets, the narrowness of the feasible sets - defined either as the 95th percentile of scores for the dissimilarity of samples from the feasible set compared to the central tendency of the feasible set, or using a breadth index for specific metrics - varies among different datasets. In particular, the FIA dataset, and subsets of the Mammal Community and Miscellaneous Abundance databases, often have highly variable, broadly-defined statistical baselines derived from the feasible set.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.