We compare the results for very small communities (fewer than 2000 possible SADs in the feasible set) to larger communities within each dataset.
knitr::opts_chunk$set(echo = FALSE) library(drake) library(dplyr) library(ggplot2) library(grid) theme_set(theme_bw()) all_di <- read.csv(here::here("analysis", "reports", "submission2", "all_di.csv"), stringsAsFactors = F) all_ct <- read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv"), stringsAsFactors = F) #fia_ct <- read.csv(here::here("fia_cts.csv")) #all_ct <- rbind(all_ct, fia_ct) all_ct <- all_ct %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) all_di <- all_di %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20) %>% left_join(all_ct) all_di <- all_di %>% group_by_all() %>% mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl), skew_percentile_mean = mean(skew_percentile, skew_percentile_excl), simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl), shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl), nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl),) %>% ungroup() all_di <- all_di %>% mutate(in_fia = ifelse(Dataset == "FIA", "FIA", "Other datasets"))
plot_percentile_hist <- function(di_df, col_name, plot_name, tails = 2, facetvar = "Dataset", min_s0 = 0) { if(tails == 2) { cutoff_percentiles = c(2.5, 97.5) min_nparts = 40 } else if (tails== 1) { cutoff_percentiles = c(95) min_nparts = 20 } di_df <- di_df %>% mutate(response = (di_df[[col_name]]), facetvar = di_df[[facetvar]]) ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) + geom_histogram(bins = 40, boundary = 100) + theme_bw() + xlab("") + ylab("") + geom_vline(xintercept = cutoff_percentiles, color = "red") + ggtitle( plot_name) + facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+ theme(plot.title = element_text(size=10)) } all_di <- all_di %>% mutate(`Observed \npercentile score` = ifelse(real_po_percentile_excl > 95, "High (> 95)", "Less than 95"))
plot_narrowness <- function(di_df, yvar, yvar_name) { if(grepl("sim_pos", yvar)) { min_nparts = 20 ylabel = "Mean dissimilarity" } else { min_nparts = 40 ylabel = "Breadth index" } di_df <- di_df %>% mutate(response = (di_df[[yvar]])) %>% filter(nparts > min_nparts, nparts < 10 ^ 50) ggplot(di_df, aes(nparts, response, color = Dataset)) + geom_point(data = filter(di_df, dat == "fia"), alpha = .1) + geom_point(data = filter(di_df, dat != "fia"), alpha = .1) + geom_point(data = filter(di_df, s0 < 0)) + scale_color_viridis_d(end = .9) + xlab("") + ylab(ylabel) + scale_x_log10() + ggtitle(yvar_name) + theme(legend.position = "none")+ theme(plot.title = element_text(size=10)) } plot_narrowness_legend <- function(di_df) { legend_df <- di_df %>% select(Dataset) %>% distinct() %>% mutate(ymark = dplyr::row_number()) ggplot(legend_df, aes(1, ymark, color = Dataset)) + geom_label(aes(x = 2, y = ymark, label = Dataset)) + geom_point(size = 4) + scale_color_viridis_d(end = .9) + xlab("") + ylab("") + theme(legend.position = "none") + xlim(1, 3) + theme( panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank() ) + ggtitle("Dataset") } # # gridExtra::grid.arrange(grobs = list( # plot_narrowness(all_di, "sim_pos_from_best", "Dissimilarity to central tendency"), # plot_narrowness_legend(all_di)), # ncol = 1)
plot_narrowness_hist <- function(di_df, col_name, plot_name, facetvar = "Dataset", min_s0 = 0) { if(grepl("sim_pos", col_name)) { xvarname = "95th percentile \nof dissimilarity scores" min_nparts = 40 } else { min_nparts = 20 xvarname = "Breadth index" } di_df <- di_df %>% mutate(response = (di_df[[col_name]]), facetvar = di_df[[facetvar]]) ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) + geom_histogram(bins = 40, boundary = 100) + theme_bw() + ylab("") + xlab(xvarname) + ggtitle( plot_name) + facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+ theme(plot.title = element_text(size=10)) }
Only three of our datasets - Mammal Communities, Misc. Abundance, and FIA - have appreciable numbers of very small communities. We use 2000 possible SADs as a cutoff because it splits these datasets into reasonably large subsets of both "large" and "small" communities.
Proportions of communities in these datasets with fewer than 2000 possible SADs:
all_di <- all_di %>% mutate( `Number of elements` = ifelse(nparts < 2000, "Less than 2000", "More than 2000")) diss_results <- all_di %>% filter(nparts > 20) %>% group_by(Dataset, `Number of elements` ) %>% summarize(high_diss = mean(real_po_percentile_excl > 95), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High dissimilarity` = paste0(signif(100 * high_diss, 2), "%; n = ", nsites)) %>% select(-nsites) skew_results <- all_di %>% filter(nparts > 40, s0 > 2) %>% group_by(Dataset, `Number of elements` ) %>% summarize(high_skew = mean(skew_percentile_excl > 97.5), low_skew = mean(skew_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High skew` = paste0(signif(100 * high_skew, 2), "%; n = ", nsites), `Low skew` = paste0(signif(100 * low_skew, 2), "%; n = ", nsites)) %>% select(-nsites) metric_results <- all_di %>% filter(nparts > 40) %>% group_by(Dataset, `Number of elements` ) %>% summarize(high_simpson = mean(simpson_percentile_excl > 97.5), low_simpson = mean(simpson_percentile < 2.5), high_shannon = mean(shannon_percentile_excl > 97.5), low_shannon = mean(shannon_percentile < 2.5), high_nsingletons = mean(nsingletons_percentile_excl > 97.5), low_nsingletons = mean(nsingletons_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High Simpson` = paste0(signif(100 * high_simpson, 2), "%; n = ", nsites), `Low Simpson` = paste0(signif(100 * low_simpson, 2), "%; n = ", nsites), `High Shannon` = paste0(signif(100 * high_shannon, 2), "%; n = ", nsites), `Low Shannon` = paste0(signif(100 * low_shannon, 2), "%; n = ", nsites), `High proportion of rare species` = paste0(signif(100 * high_nsingletons, 2), "%; n = ", nsites), `Low proportion of rare species` = paste0(signif(100 * low_nsingletons, 2), "%; n = ", nsites)) %>% select(-nsites) all_results <- left_join(diss_results, skew_results) %>% left_join(metric_results) cols1 <- c("Dataset", "Number of elements", "High dissimilarity", "High proportion of rare species", "High skew", "Low Simpson", "Low Shannon") cols2 <- c("Dataset", "Number of elements", "Low proportion of rare species", "Low skew", "High Simpson", "High Shannon") all_di %>% filter(dat %in% c("fia", "mcdb", "misc_abund")) %>% group_by(Dataset) %>% summarize(`Proportion with < 2000 possible SADs` = mean(nparts < 2000))
The proportions of communities with >/< 2000 possible SADs with extreme percentile scores for each dataset.
For dissimilarity, approximately 5% of communities would have extreme scores by chance. For all other metrics, this would be approximately 2.5%.
In the direction of effects usually seen:
all_results %>% filter(Dataset %in% c("FIA", "Mammal Communities", "Misc. Abundance")) %>% select(cols1)
In the opposite direction:
all_results %>% filter(Dataset %in% c("FIA", "Mammal Communities", "Misc. Abundance")) %>% select(cols2)
Breadth indices (top) and percentile scores (bottom) for communities with fewer than 2000 possible SADs, or more than 2000 possible SADs, from FIA, Mammal Community, and Misc. Abundance datasets.
nhist <- plot_narrowness_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "sim_pos_from_best_95", "Dissimilarity to the central tendency") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3) + xlab("95th percentile of scores for dissimilarity to the central tendency \nfor elements of the feasible set") + ylab("Number of communities") + ggtitle("") phist <- plot_percentile_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "real_po_percentile_mean", "Dissimilarity to the central tendency") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3) + xlab("Percentile rank of observed value relative to feasible set") + ylab("Number of communities") + ggtitle("") nskew <- plot_narrowness_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "skew_95_ratio_2t", "Skewness") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Breadth index") + ylab("Number of communities") + ggtitle("") pskew <- plot_percentile_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "skew_percentile_mean", "Skewness") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Percentile rank of observed value relative to feasible set") + ylab("Number of communities") + ggtitle("") nrare <- plot_narrowness_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "nsingletons_95_ratio_2t", "Number of rare species") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Breadth index") + ylab("Number of communities") + ggtitle("") prare <- plot_percentile_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "nsingletons_percentile_mean", "Number of rare species") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Percentile rank of observed value relative to feasible set") + ylab("Number of communities") + ggtitle("") nsimp <- plot_narrowness_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "simpson_95_ratio_2t", "Simpson") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Breadth index") + ylab("Number of communities") + ggtitle("") psimp <- plot_percentile_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "simpson_percentile_mean", "Simpson") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Percentile rank of observed value relative to feasible set") + ylab("Number of communities") + ggtitle("") nshan <- plot_narrowness_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "shannon_95_ratio_2t", "Shannon") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Breadth index") + ylab("Number of communities") + ggtitle("") pshan <- plot_percentile_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "shannon_percentile_mean", "Shannon") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3)+ xlab("Percentile rank of observed value relative to feasible set") + ylab("Number of communities") + ggtitle("") gridExtra::grid.arrange(grobs = list(nhist, phist), nrow = 2, top = textGrob("Dissimilarity to the central tendency")) gridExtra::grid.arrange(grobs = list(nskew, pskew), nrow = 2, top = textGrob("Skewness")) gridExtra::grid.arrange(grobs = list(nrare, prare), nrow = 2, top = textGrob("Number of rare species")) gridExtra::grid.arrange(grobs = list(nsimp, psimp), nrow = 2, top = textGrob("Simpson")) gridExtra::grid.arrange(grobs = list(nshan, pshan), nrow = 2, top = textGrob("Shannon"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.