knitr::opts_chunk$set(echo = FALSE) library(drake) library(dplyr) library(ggplot2) library(grid) theme_set(theme_bw()) all_di <- read.csv(here::here("analysis", "reports", "submission2", "all_di.csv"), stringsAsFactors = F) all_ct <- read.csv(here::here("analysis", "reports", "submission2","all_ct.csv"), stringsAsFactors = F) all_ct <- all_ct %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) all_di <- all_di %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20) %>% left_join(all_ct) %>% mutate(Dataset_short = ifelse(Dataset == "FIA", "FIA", ifelse(Dataset == "Breeding Bird Survey", "BBS", ifelse(Dataset == "Mammal Communities", "Mammals", ifelse(Dataset == "Gentry", "Gentry", "Misc."))))) all_di <- all_di %>% group_by_all() %>% mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl), skew_percentile_mean = mean(skew_percentile, skew_percentile_excl), simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl), shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl), nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl),) %>% ungroup() all_di <- all_di %>% mutate(in_fia = ifelse(Dataset == "FIA", "FIA", "Other datasets"))
fig1 <- ggplot(all_di, aes(x = s0, y = n0, color = Dataset)) + geom_point(data = filter(all_di, dat == "fia"), alpha = .1) + geom_point(data = filter(all_di, dat != "fia"), alpha = .2) + geom_point(data = filter(all_di, dat == "points for legend"), alpha = 1) + theme_bw() + scale_color_viridis_d(end = .9) + ggtitle("Communities by S and N") + xlab("Species richness (S); note log scale") + ylab("Total abundance (N); note log scale") + scale_x_log10() + scale_y_log10() + theme(legend.position = c(.3,.8), legend.justification = "center", legend.background = element_blank(), legend.text = element_text(size = 8), legend.title = element_text(size = 9), legend.key.size = unit(2, units = "mm"), axis.text = element_text(size = 7)) ggsave("figure_1.pdf", plot = fig1, device = "pdf", width = 80, height = 80, units = "mm", dpi = 800)
To show the 95% interval, we need to load the distribution of shape metric values from the samples from the feasible set for a few communities. See rov_metric.md.
library(drake) db <- DBI::dbConnect(RSQLite::SQLite(), here::here("analysis", "drake", "drake-cache-net.sqlite")) cache <- storr::storr_dbi("datatable", "keystable", db) cache$del(key = "lock", namespace = "session") net_summary <- readd(all_di_summary, cache = cache) net_summary <- net_summary %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts))) example_fs <- readd(fs_s_44_n_13360, cache = cache) example_di <- readd(di_fs_s_44_n_13360, cache =cache) example_fs <- example_fs %>% left_join(example_di) %>% left_join(net_summary) example_fs2 <- readd(fs_s_13_n_315, cache = cache) example_di2 <- readd(di_fs_s_13_n_315, cache =cache) example_fs2 <- example_fs2 %>% left_join(example_di2) %>% left_join(net_summary) example_fs3 <- readd(fs_s_4_n_34, cache = cache) example_di3 <- readd(di_fs_s_4_n_34, cache =cache) example_fs3 <- example_fs3 %>% left_join(example_di3) %>% left_join(net_summary) breadth_plots <- list( ggplot(example_fs3, aes(rank, abund, group = sim, color = skew)) + geom_line(alpha = .25) + theme_bw() + scale_color_viridis_c(option = "plasma", end = .8) + ggtitle("Small community", subtitle = paste0("S = ", (example_fs3$s0), "; N = ", (example_fs3$n0[1]))) + theme(legend.position = "right") + xlab("Rank") + ylab("Abundance"), ggplot(example_fs3, aes(skew)) + # geom_density() + geom_histogram(bins = 50) + theme_bw() + geom_vline(xintercept = c(example_fs3$skew_97p5[1], example_fs3$skew_2p5[1]), color = "red") + ggtitle("", subtitle = paste0("Breadth index: ", round((example_fs3$skew_95_ratio_2t[1]), 2))) + xlab("Skewness") + ylab("Count"), ggplot(example_fs2, aes(rank, abund, group = sim, color = skew)) + geom_line(alpha = .1) + theme_bw() + scale_color_viridis_c(option = "plasma", end = .8) + ggtitle("Medium community", subtitle = paste0("S = ", (example_fs2$s0), "; N = ", (example_fs2$n0[1]))) + theme(legend.position = "right")+ xlab("Rank") + ylab("Abundance") + ylim(0, 200), # Remove 3 sads that make the axes too big to be interpretable ggplot(example_fs2, aes(skew)) + # geom_density() + geom_histogram(bins = 50) + theme_bw() + geom_vline(xintercept = c(example_fs2$skew_97p5[1], example_fs2$skew_2p5[1]), color = "red") + ggtitle("", subtitle = paste0("Breadth index: ", round((example_fs2$skew_95_ratio_1t[1]), 2)))+ xlab("Skewness") + ylab("Count"), ggplot(example_fs, aes(rank, abund, group = sim, color = skew)) + geom_line(alpha = .1) + theme_bw() + scale_color_viridis_c(option = "plasma", end = .8) + ggtitle("Large community", subtitle = paste0("S = ", (example_fs$s0), "; N = ", (example_fs$n0[1]))) + theme(legend.position = "right") + ylim(0, 4000) + # Remove a very few very very uneven SADs that make the scale too big to be interpretable theme(axis.text.y = element_text(size = 6, angle = 60))+ xlab("Rank") + ylab("Abundance"), ggplot(example_fs, aes(skew)) + # geom_density() + geom_histogram(bins = 50) + theme_bw() + geom_vline(xintercept = c(example_fs$skew_97p5[1], example_fs$skew_2p5[1]), color = "red") + ggtitle("", subtitle = paste0("Breadth index: ", round((example_fs$skew_95_ratio_1t[1]), 2)))+ xlab("Skewness") + ylab("Count") ) fig_2 <- gridExtra::grid.arrange(grobs = breadth_plots, ncol = 2) ggsave("figure_2.pdf", plot = fig_2, device = "pdf", width = 180, height = 120, dpi = 800, units = "mm") # signif(as.numeric(example_di$nparts), digits = 2)[1] (as.numeric(example_di2$nparts))[1] as.numeric(example_di3$nparts)[1] DBI::dbDisconnect(db) rm(cache) rm(db)
plot_percentile_hist <- function(di_df, col_name, plot_name, tails = 2, facetvar = "Dataset", min_s0 = 0) { if(tails == 2) { cutoff_percentiles = c(2.5, 97.5) min_nparts = 40 } else if (tails== 1) { cutoff_percentiles = c(95) min_nparts = 20 } di_df <- di_df %>% mutate(response = (di_df[[col_name]]), facetvar = di_df[[facetvar]]) ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) + geom_histogram(bins = 40, boundary = 100) + theme_bw() + xlab("") + ylab("") + geom_vline(xintercept = cutoff_percentiles, color = "red") + ggtitle( plot_name) + facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+ theme(plot.title = element_text(size=9), plot.margin=unit(c(1, 0, -5, -1), units = "mm"), axis.text = element_text(size = 6), strip.text = element_text(size = 7), strip.placement = "inside") + scale_y_continuous(n.breaks = 3) } all_di <- all_di %>% mutate(`Observed \npercentile score` = ifelse(real_po_percentile_excl > 95, "High (> 95)", "Less than 95"))
fig_3 <- gridExtra::grid.arrange(grobs = list( plot_percentile_hist(all_di, "real_po_percentile_mean", "Dissimilarity", facetvar = "Dataset", tails = 1), plot_percentile_hist(all_di, "nsingletons_percentile_mean", "Proportion of rare species", facetvar = "Dataset"), plot_percentile_hist(all_di, "skew_percentile_mean", "Skewness", facetvar = "Dataset", min_s0 = 3) + theme(plot.margin =unit(c(1, 3, -5, -1), units = "mm")), plot_percentile_hist(all_di, "simpson_percentile_mean", "Simpson evenness", facetvar = "Dataset"), plot_percentile_hist(all_di, "shannon_percentile_mean", "Shannon diversity", facetvar = "Dataset") ), ncol = 3, left = textGrob("Number of communities", rot = 90, gp = gpar(fill = "black")), bottom = textGrob("Percentile rank of observed value relative to feasible set"), gp = gpar(fill = "white")) fig_3 ggsave("figure_3.pdf", plot = fig_3, device = "pdf", width = 180, height = 180, dpi = 800, units = "mm")
diss_results <- all_di %>% filter(nparts > 20) %>% group_by(Dataset) %>% summarize(high_diss = mean(real_po_percentile_excl > 95), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High dissimilarity` = paste0(signif(100 * high_diss, 2), "%; n = ", nsites)) %>% select(-nsites) skew_results <- all_di %>% filter(nparts > 40, s0 > 2) %>% group_by(Dataset) %>% summarize(high_skew = mean(skew_percentile_excl > 97.5), low_skew = mean(skew_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High skew` = paste0(signif(100 * high_skew, 2), "%; n = ", nsites), `Low skew` = paste0(signif(100 * low_skew, 2), "%; n = ", nsites)) %>% select(-nsites) metric_results <- all_di %>% filter(nparts > 40) %>% group_by(Dataset) %>% summarize(high_simpson = mean(simpson_percentile_excl > 97.5), low_simpson = mean(simpson_percentile < 2.5), high_shannon = mean(shannon_percentile_excl > 97.5), low_shannon = mean(shannon_percentile < 2.5), high_nsingletons = mean(nsingletons_percentile_excl > 97.5), low_nsingletons = mean(nsingletons_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High Simpson` = paste0(signif(100 * high_simpson, 2), "%; n = ", nsites), `Low Simpson` = paste0(signif(100 * low_simpson, 2), "%; n = ", nsites), `High Shannon` = paste0(signif(100 * high_shannon, 2), "%; n = ", nsites), `Low Shannon` = paste0(signif(100 * low_shannon, 2), "%; n = ", nsites), `High proportion of rare species` = paste0(signif(100 * high_nsingletons, 2), "%; n = ", nsites), `Low proportion of rare species` = paste0(signif(100 * low_nsingletons, 2), "%; n = ", nsites)) %>% select(-nsites) all_results <- left_join(diss_results, skew_results) %>% left_join(metric_results) cols1 <- c("Dataset", "High dissimilarity", "High proportion of rare species", "High skew", "Low Simpson", "Low Shannon") all_results %>% select(cols1) cols2 <- c("Dataset", "Low proportion of rare species", "Low skew", "High Simpson", "High Shannon") all_results %>% select(cols2)
plot_narrowness <- function(di_df, yvar, yvar_name, use_short = T) { if(grepl("sim_pos", yvar)) { min_nparts = 20 ylabel = "Mean dissimilarity" } else { min_nparts = 40 ylabel = "Breadth index" } di_df <- di_df %>% mutate(response = (di_df[[yvar]])) %>% filter(nparts > min_nparts, nparts < 10 ^ 50) if(use_short) { di_df <- di_df %>% mutate(Dataset = Dataset_short) } ggplot(di_df, aes(nparts, response, color = Dataset)) + geom_point(data = filter(di_df, dat == "fia"), alpha = .1) + geom_point(data = filter(di_df, dat != "fia"), alpha = .1) + geom_point(data = filter(di_df, s0 < 0)) + scale_color_viridis_d(end = .9) + xlab("") + ylab(ylabel) + scale_x_log10() + ggtitle(yvar_name) + theme(legend.position = "none")+ theme(plot.title = element_text(size=10)) + theme(legend.text = element_text(size = 8), plot.margin = unit(c(0, 10, 0, 2), units = "mm"), axis.text = element_text(size = 7)) }
# # fig_4 <- gridExtra::grid.arrange(grobs = list( # plot_narrowness(all_di, "sim_pos_from_best_95", "Dissimilarity \nto central tendency") + ylab("Dissimilarity \n95th percentile") + theme(legend.position = "bottom", legend.direction = "vertical" , legend.margin = margin(b = 40, r = 0, t = 10, l = 0, unit = "mm")), # gridExtra::grid.arrange(grobs = list( # plot_narrowness(all_di, "skew_95_ratio_2t", "\nSkewness"), # plot_narrowness(all_di, "simpson_95_ratio_2t", "\nSimpson evenness")+ ylab(""), # plot_narrowness(all_di, "nsingletons_95_ratio_2t", "\nProportion of rare species"), # plot_narrowness(all_di, "shannon_95_ratio_2t", "\nShannon diversity") + ylab("") # ), ncol = 2)), ncol = 2, widths = c( 2, 4),bottom = textGrob("Number of elements in the feasible set")) fig_4 <- gridExtra::grid.arrange(grobs = list( plot_narrowness(all_di, "sim_pos_from_best_95", "Dissimilarity") + ylab("Dissimilarity \n95th percentile") + theme(plot.margin =unit(c(1, 10, 0, 2), units = "mm") ), plot_narrowness(all_di, "skew_95_ratio_2t", "Skewness") + ylab("\nBreadth index"), plot_narrowness(all_di, "simpson_95_ratio_2t", "Simpson evenness")+ ylab("\nBreadth index"), plot_narrowness(all_di, "nsingletons_95_ratio_2t", "Proportion of rare species")+ ylab("\nBreadth index"), plot_narrowness(all_di, "shannon_95_ratio_2t", "Shannon diversity")+ ylab("\nBreadth index")+ theme(legend.position = "bottom", legend.direction = "horizontal", legend.key = element_blank(), legend.margin = margin(-.3,0,0,0, unit = "in"),legend.key.size = unit(.25, units = "mm"), legend.title = element_text(size = 8))), ncol = 1, bottom = textGrob("Number of elements in the feasible set")) #fig_4 # # pdf("figure_4.pdf", bg = "white", paper = "letter", width = 8, height = 5) # fig_4 # dev.set(dev.next()) # while (!is.null(dev.list())) dev.off() ggsave("figure_4.pdf", plot = fig_4, device = "pdf", width = 80, height = 180, dpi = 800, units = "mm")
plot_narrowness_hist <- function(di_df, col_name, plot_name, facetvar = "Dataset", min_s0 = 0) { if(grepl("sim_pos", col_name)) { xvarname = "Mean dissimilarity" min_nparts = 40 } else { min_nparts = 20 xvarname = "Breadth index" } di_df <- di_df %>% mutate(response = (di_df[[col_name]]), facetvar = di_df[[facetvar]]) ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) + geom_histogram(bins = 40, boundary = 100) + theme_bw() + ylab("") + xlab(xvarname) + ggtitle( plot_name) + facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+ theme(plot.title = element_text(size=10)) }
plot_percentile_hist_small <- function(di_df, col_name, plot_name, tails = 2, facetvar = "Dataset", min_s0 = 0) { if(tails == 2) { cutoff_percentiles = c(2.5, 97.5) min_nparts = 40 } else if (tails== 1) { cutoff_percentiles = c(95) min_nparts = 20 } di_df <- di_df %>% mutate(response = (di_df[[col_name]]), facetvar = di_df[[facetvar]]) ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) + geom_histogram(bins = 40, boundary = 100) + theme_bw() + xlab("") + ylab("") + geom_vline(xintercept = cutoff_percentiles, color = "red") + ggtitle( plot_name) + #geom_text(data = distinct(select(di_df, Dataset)), aes(x = 45, y = 100, label = Dataset)) + facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+ theme(plot.title = element_text(size=9), axis.text = element_text(size = 6), strip.text = element_text(size = 7), strip.placement = "inside") + scale_y_continuous(n.breaks = 3) } all_di <- all_di %>% mutate( `Number of elements` = ifelse(nparts < 2000, "Less than 2000 possible SADs", "More than 2000 possible SADs")) diss_results <- all_di %>% filter(nparts > 20) %>% group_by(Dataset, `Number of elements` ) %>% summarize(high_diss = mean(real_po_percentile_excl > 95), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High dissimilarity` = paste0(signif(100 * high_diss, 2), "%; n = ", nsites)) %>% select(-nsites) skew_results <- all_di %>% filter(nparts > 40, s0 > 2) %>% group_by(Dataset, `Number of elements` ) %>% summarize(high_skew = mean(skew_percentile_excl > 97.5), low_skew = mean(skew_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High skew` = paste0(signif(100 * high_skew, 2), "%; n = ", nsites), `Low skew` = paste0(signif(100 * low_skew, 2), "%; n = ", nsites)) %>% select(-nsites) metric_results <- all_di %>% filter(nparts > 40) %>% group_by(Dataset, `Number of elements` ) %>% summarize(high_simpson = mean(simpson_percentile_excl > 97.5), low_simpson = mean(simpson_percentile < 2.5), high_shannon = mean(shannon_percentile_excl > 97.5), low_shannon = mean(shannon_percentile < 2.5), high_nsingletons = mean(nsingletons_percentile_excl > 97.5), low_nsingletons = mean(nsingletons_percentile < 2.5), nsites = dplyr::n()) %>% ungroup() %>% mutate(`High Simpson` = paste0(signif(100 * high_simpson, 2), "%; n = ", nsites), `Low Simpson` = paste0(signif(100 * low_simpson, 2), "%; n = ", nsites), `High Shannon` = paste0(signif(100 * high_shannon, 2), "%; n = ", nsites), `Low Shannon` = paste0(signif(100 * low_shannon, 2), "%; n = ", nsites), `High proportion of rare species` = paste0(signif(100 * high_nsingletons, 2), "%; n = ", nsites), `Low proportion of rare species` = paste0(signif(100 * low_nsingletons, 2), "%; n = ", nsites)) %>% select(-nsites) all_results <- left_join(diss_results, skew_results) %>% left_join(metric_results) cols1 <- c("Dataset", "Number of elements", "High dissimilarity", "High proportion of rare species", "High skew", "Low Simpson", "Low Shannon") all_results %>% select(cols1) all_di %>% filter(dat %in% c("fia", "mcdb", "misc_abund")) %>% group_by(Dataset) %>% summarize(`Proportion with nparts < 2000` = mean(nparts < 2000)) nhist <- plot_narrowness_hist(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "skew_95_ratio_2t", "Skewness") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3) + xlab("Breadth index") + ylab("Number of communities") + ggtitle("Skewness") + theme(plot.margin = unit(c(0,5,0,2), units = "mm"), axis.text = element_text(size = 6) ,strip.text = element_text(size = 7), strip.placement = "inside") phist <- plot_percentile_hist_small(filter(all_di, dat %in% c("fia", "mcdb", "misc_abund")), "skew_percentile_mean", "Skewness") + facet_wrap(vars(`Number of elements`, Dataset), scales = "free_y", nrow = 2, ncol = 3) + xlab("Percentile rank of observed value \nrelative to feasible set") + ylab("Number of communities") + ggtitle("Skewness")+ theme(plot.margin = unit(c(0,5,5,2), units = "mm"), plot.title = element_text(size = 10)) fig_6 <- gridExtra::grid.arrange(grobs = list(nhist, phist), nrow = 2) fig_6 ggsave("figure_6.pdf", plot = fig_6, device = "pdf", width = 180, height = 150, dpi = 800, units = "mm")
all_di_with_resamples <- read.csv(here::here("analysis", "reports", "submission2", "all_di.csv")) jk_di_full <- read.csv(here::here("analysis", "reports", "submission2", "all_di_jk.csv")) all_ct_with_resamples <- read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv")) jk_ct <- read.csv(here::here("analysis", "reports", "submission2", "all_ct_jk.csv")) jk_di_full <- left_join(jk_di_full, jk_ct) all_di_with_resamples <- left_join(all_di_with_resamples, all_ct_with_resamples) jk_di <- jk_di_full %>% group_by_all() %>% mutate(site_source = unlist(strsplit(site, "_")[[1]][[1]])) %>% ungroup() %>% select(dat, site, site_source, s0, n0, skew_percentile_excl,skew_percentile, simpson_percentile, simpson_percentile_excl, shannon_percentile, shannon_percentile_excl, nsingletons_percentile, nsingletons_percentile_excl, real_po_percentile, real_po_percentile_excl, sim_pos_from_best, skew_95_ratio_2t, simpson_95_ratio_2t, shannon_95_ratio_2t, nparts) skewcols <- colnames(jk_di)[ grepl("skew", colnames(jk_di))] jk_di[ which(jk_di$s0 < 3), skewcols] <- NA jk_di_means <- jk_di %>% filter(n0 != s0, s0 != 1, n0 != (s0 + 1)) %>% group_by(dat, site_source) %>% mutate(njks = dplyr::n(), njks_skew = sum(!is.na(skew_percentile)))%>% summarise_at(vars(-group_cols(), -site), mean, na.rm = T) %>% mutate(resampling = "Subsampling") %>% mutate(site = as.numeric(site_source)) %>% # select(-site_source) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20, njks == 10) %>% ungroup() jk_di[ which(jk_di$njks_skew < 10), skewcols] <- NA skewcols <- colnames(all_di_with_resamples[ which(grepl("skew", colnames(all_di_with_resamples)))]) all_di_with_resamples[ which(all_di_with_resamples$s0 < 3), skewcols] <- NA all_di_with_resamples <- all_di_with_resamples %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, # !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20) %>% mutate(resampling = ifelse(singletons, "Rare species", "Raw")) %>% bind_rows(jk_di_means) metric_rs_results <- all_di_with_resamples %>% group_by(Dataset, resampling) %>% summarize( skew_high = mean(skew_percentile_excl > 97.5, na.rm = T), skew_low = mean(skew_percentile < 2.5, na.rm = T), even_high = mean(simpson_percentile_excl > 97.5, na.rm = T), even_low = mean(simpson_percentile < 2.5, na.rm = T), shannon_high = mean(shannon_percentile_excl > 97.5, na.rm = T), shannon_low = mean(shannon_percentile < 2.5, na.rm = T), nsingletons_high = mean(nsingletons_percentile_excl > 97.5, na.rm = T), nsingletons_low = mean(nsingletons_percentile < 2.5, na.rm = T), nskew = sum(!is.na(skew_percentile)), nother = dplyr::n() ) %>% ungroup() diss_rs_results <- all_di_with_resamples %>% filter(nparts > 40) %>% group_by(Dataset, resampling) %>% summarize(dissimilarity_high = mean(real_po_percentile_excl > 95, na.rm = T), ndiss = sum(!is.na(real_po_percentile_excl))) %>% ungroup() rs_results <- left_join(metric_rs_results, diss_rs_results) #metric_cols <- colnames(rs_results [ grepl("_", colnames(rs_results))]) as.perc <- function(x) { return((signif(100 * x, digits = 2))) } rs_results_desc <- rs_results %>% mutate(`High dissimilarity` = paste0(as.perc(dissimilarity_high), "%; n =", ndiss), `High proportion of rare species` = paste0(as.perc(nsingletons_high), "%; n =", nother), `High skew` = paste0(as.perc(skew_high), "%; n = ", nskew), `Low Simpson` = paste0(as.perc(even_low), "%; n = ", nother), `Low Shannon` = paste0(as.perc(shannon_low), "%; n = ", nother), `Low proportion of rare species` = paste0(as.perc(nsingletons_low), "%; n =", nother), `Low skew` = paste0(as.perc(skew_low), "%; n = ", nskew), `High Simpson` = paste0(as.perc(even_high), "%; n = ", nother), `High Shannon` = paste0(as.perc(shannon_high), "%; n = ", nother), `Resampling scheme` = ordered(resampling, levels = c("Raw", "Subsampling", "Rare species"))) %>% arrange(Dataset, `Resampling scheme`) cols1 <- c("Dataset", "Resampling scheme", "High dissimilarity", "High proportion of rare species", "High skew", "Low Simpson", "Low Shannon") cols2 <- c("Dataset","Resampling scheme", "Low proportion of rare species", "Low skew", "High Simpson", "High Shannon") rs_results_desc %>% select(cols1) rs_results_desc %>% select(cols2)
rs_results_long <- rs_results %>% tidyr::pivot_longer((-c("Dataset","resampling")), names_to = "metric", values_to = "value") %>% filter(grepl("_", metric)) %>% mutate(ometric = ordered(metric, levels = c("skew_high", "skew_low", "even_low", "even_high", "shannon_low", "shannon_high", "nsingletons_high", "nsingletons_low", "dissimilarity_high")), ometric2 = ordered(metric, levels = c("dissimilarity_high", "nsingletons_high", "skew_high", "even_low", "shannon_low", "nsingletons_low","skew_low","even_high","shannon_high"))) %>% mutate(weird_dir = as.numeric(ometric2) > 5, percentile_cutoff = ifelse(grepl("diss", metric), .05, .025),`Resampling scheme` = ordered(resampling, levels = c("Raw", "Subsampling", "Rare species"))) metric_names <- rs_results_long %>% select(ometric2) %>% distinct() %>% arrange(ometric2) %>% mutate(metric_desc =(c("High dissimilarity", "High proportion \nof rare species", "High skew", "Low Simpson", "Low Shannon", "Low proportion \nof rare species", "Low skew", "High Simpson", "High Shannon"))) %>% mutate(metric_desc = ordered(metric_desc)) rs_results_long <- left_join(rs_results_long, metric_names) %>% mutate(Dataset_short = ifelse(Dataset == "FIA", "FIA", ifelse(Dataset == "Breeding Bird Survey", "BBS", ifelse(Dataset == "Mammal Communities", "Mammals", ifelse(Dataset == "Gentry", "Gentry", "Misc."))))) fig_5 <- gridExtra::grid.arrange(grobs = list( ggplot(filter(rs_results_long, !weird_dir), aes(metric_desc, value, color = `Resampling scheme`)) + geom_point() + facet_wrap(vars(Dataset_short), scales = "fixed", ncol =5) + geom_point(aes(y = percentile_cutoff), shape = 95, color = "black", size = 10) + scale_color_viridis_d(end = .8, option = "plasma") + ylab("Proportion of extreme \nobserved values") + xlab("") +ylim(0, .7) + theme(legend.position = "bottom", axis.text.x = element_text(angle = 90), plot.margin = unit(c(5, 5, 2, 2), units = "mm")))) ggsave("figure_5.pdf", device = "pdf", plot = fig_5, width = 180, height = 80, dpi = 800, units = "mm") # # pdf("figure_5.pdf", bg = "white", paper = "letter", width = 6, height = 5) # fig_5 # dev.set(dev.next()) # while (!is.null(dev.list())) dev.off() # # # gridExtra::grid.arrange(grobs = list( # ggplot(filter(rs_results_long, !grepl("Misc", Dataset), weird_dir), aes(metric_desc, value, color = `Resampling scheme`)) + # geom_point() + # facet_wrap(vars(Dataset), scales = "fixed", ncol =2) + # geom_point(aes(y = percentile_cutoff), shape = 95, color = "black", size = 10) + # scale_color_viridis_d(end = .8) + # ylab("Proportion of extreme observed values") +ylim(0, .7) + # xlab("") + theme(legend.position = "none", axis.text.x = element_text(angle = 90)) , # ggplot(filter(rs_results_long, grepl("Misc", Dataset), weird_dir), aes(metric_desc, value, color = `Resampling scheme`)) + # geom_point() + # facet_wrap(vars(Dataset), scales = "fixed", ncol = 2) + # geom_point(aes(y = percentile_cutoff), shape = 95, color = "black", size = 10) + # scale_color_viridis_d(end = .8) + # ylab("") + # xlab("") + ylim(0, .7) + # theme(legend.position = "bottom", legend.direction = "vertical" , legend.margin = margin(b = .3, r = 0, t = 0, l = 0, unit = "in"), axis.text.x = element_text(angle = 90))), ncol = 2, widths = c( 4,2), bottom = textGrob("Metric"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.