knitr::opts_chunk$set(echo = FALSE) library(drake) library(dplyr) library(ggplot2) library(grid) all_di <- read.csv(here::here("analysis", "rev_prototyping", "all_di.csv"), stringsAsFactors = F) all_di <- all_di %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset))
To show the 95% interval, we need to load the distribution of shape metric values from the samples from the feasible set for a few communities. See rov_metric.md.
library(drake) db <- DBI::dbConnect(RSQLite::SQLite(), here::here("analysis", "drake", "drake-cache-net.sqlite")) cache <- storr::storr_dbi("datatable", "keystable", db) cache$del(key = "lock", namespace = "session") net_summary <- readd(all_di_summary, cache = cache) net_summary <- net_summary %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts))) example_fs <- readd(fs_s_44_n_13360, cache = cache) example_di <- readd(di_fs_s_44_n_13360, cache =cache) example_fs <- example_fs %>% left_join(example_di) %>% left_join(net_summary) example_fs2 <- readd(fs_s_7_n_71, cache = cache) example_di2 <- readd(di_fs_s_7_n_71, cache =cache) example_fs2 <- example_fs2 %>% left_join(example_di2) %>% left_join(net_summary) example_fs3 <- readd(fs_s_4_n_34, cache = cache) example_di3 <- readd(di_fs_s_4_n_34, cache =cache) example_fs3 <- example_fs3 %>% left_join(example_di3) %>% left_join(net_summary) breadth_plots <- list( ggplot(example_fs3, aes(rank, abund, group = sim, color = skew)) + geom_line(alpha = .25) + theme_bw() + scale_color_viridis_c(option = "plasma", end = .8) + ggtitle("Small community", subtitle = paste0("S = ", (example_fs3$s0), "; N = ", (example_fs3$n0[1]))) + theme(legend.position = "right") + xlab("Rank") + ylab("Abundance"), ggplot(example_fs3, aes(skew)) + # geom_density() + geom_histogram(bins = 50) + theme_bw() + geom_vline(xintercept = c(example_fs3$skew_95[1], example_fs3$skew_min[1]), color = "red") + ggtitle("", subtitle = paste0("Breadth index: ", round((example_fs3$skew_95_ratio_1t[1]), 2))) + xlab("Skewness") + ylab("Count"), ggplot(example_fs2, aes(rank, abund, group = sim, color = skew)) + geom_line(alpha = .1) + theme_bw() + scale_color_viridis_c(option = "plasma", end = .8) + ggtitle("Medium community", subtitle = paste0("S = ", (example_fs2$s0), "; N = ", (example_fs2$n0[1]))) + theme(legend.position = "right")+ xlab("Rank") + ylab("Abundance"), ggplot(example_fs2, aes(skew)) + # geom_density() + geom_histogram(bins = 50) + theme_bw() + geom_vline(xintercept = c(example_fs2$skew_95[1], example_fs2$skew_min[1]), color = "red") + ggtitle("", subtitle = paste0("Breadth index: ", round((example_fs2$skew_95_ratio_1t[1]), 2)))+ xlab("Skewness") + ylab("Count"), ggplot(example_fs, aes(rank, abund, group = sim, color = skew)) + geom_line(alpha = .1) + theme_bw() + scale_color_viridis_c(option = "plasma", end = .8) + ggtitle("Large community", subtitle = paste0("S = ", (example_fs$s0), "; N = ", (example_fs$n0[1]))) + theme(legend.position = "right") + ylim(0, 5000) + # Remove a very few very very uneven SADs that make the scale too big to be interpretable theme(axis.text.y = element_text(size = 6, angle = 60))+ xlab("Rank") + ylab("Abundance"), ggplot(example_fs, aes(skew)) + # geom_density() + geom_histogram(bins = 50) + theme_bw() + geom_vline(xintercept = c(example_fs$skew_95[1], example_fs$skew_min[1]), color = "red") + ggtitle("", subtitle = paste0("Breadth index: ", round((example_fs$skew_95_ratio_1t[1]), 2)))+ xlab("Skewness") + ylab("Count") ) fig_1 <- gridExtra::grid.arrange(grobs = breadth_plots, ncol = 2, top = textGrob("Figure 1", gp = gpar(fill = "white"))) plot(fig_1) DBI::dbDisconnect(db) rm(cache)
fig_2 <- gridExtra::grid.arrange(grobs = list( ggplot(filter(all_di, s0 > 2, nparts > 20), aes(skew_percentile_excl)) + geom_histogram(bins = 100) + geom_vline(xintercept = 95, color = "red") + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + ggtitle("Skewness") + xlab("") + ylab(""),# + # xlab("Percentile rank of observed value relative to feasible set") + #ylab("Number of communities") , ggplot(filter(all_di, nparts > 20), aes(simpson_percentile)) + geom_histogram(bins = 100) + geom_vline(xintercept = 5, color = "red") + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + ggtitle("Evenness") + xlab("") + ylab("")#+ #xlab("Percentile rank of observed value relative to feasible set") + # ylab("Number of communities") ), ncol = 2, top = textGrob("Figure 2", gp = gpar(fill = "white")), left = textGrob("Number of communities", rot = 90, gp = gpar(fill = "black")), bottom = textGrob("Percentile rank of observed value relative to feasible set"), gp = gpar(fill = "white"))
fig_3 <- gridExtra::grid.arrange(grobs = list( ggplot(filter(all_di, s0 > 2, nparts > 20), aes(x= skew_95_ratio_1t)) + geom_histogram() + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + xlab("") + ylab("") + ggtitle("Skewness") + xlim(0, 1), ggplot(filter(all_di, nparts > 20), aes(x = simpson_95_ratio_1t)) + geom_histogram() + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + xlab("") + ylab("") + ggtitle("Evenness") + xlim(0, 1) ), ncol = 2, top = textGrob("Figure 3", gp = gpar(fill = "white")), left = "Number of communities", bottom = "Breadth index (ranges 0-1, with 1 being very broad)") plot(fig_3)
fia_max_s = max(filter(all_di, dat == "fia")$s0) fia_max_n = max(filter(all_di, dat == "fia")$n0) fia = filter(all_di, dat == "fia") small_di <- all_di %>% filter(s0 <= fia_max_s, n0 <= fia_max_n) %>% mutate(fia_yn = ifelse(dat == "fia", "fia", "other datasets"), right_corner = FALSE) for(i in 1:nrow(small_di)) { if(small_di$dat[i] != "fia") { fia_match_s0 = filter(fia, s0 >= small_di$s0[i]) max_n0_for_this_s0 = max(fia_match_s0$n0) small_di$right_corner[i] = small_di$n0[i] > max_n0_for_this_s0 } } small_di <- small_di %>% filter(!right_corner) not_fia <- filter(small_di, fia_yn != "fia") fia = filter(small_di, dat == "fia") small_di_s_n <- small_di %>% select(s0, n0) %>% distinct() %>% mutate(in_fia = FALSE, in_not_fia = FALSE) for(i in 1:nrow(small_di_s_n)) { this_s0 = small_di_s_n$s0[i] this_n0 = small_di_s_n$n0[i] fia_matches = filter(fia, s0 == this_s0, n0 == this_n0) not_fia_matches = filter(not_fia, s0 == this_s0, n0 == this_n0) small_di_s_n$in_fia[i] = nrow(fia_matches) > 0 small_di_s_n$in_not_fia[i] = nrow(not_fia_matches) > 0 } small_di_s_n <- filter(small_di_s_n, in_fia, in_not_fia) set.seed(1977) subsamples <- list() for(i in 1:nrow(small_di_s_n)) { this_s0 = small_di_s_n$s0[i] this_n0 = small_di_s_n$n0[i] fia_matches = filter(fia, s0 == this_s0, n0 == this_n0) not_fia_matches = filter(not_fia, s0 == this_s0, n0 == this_n0) n_to_draw = min(nrow(fia_matches), nrow(not_fia_matches)) fia_draw = sample.int(n = nrow(fia_matches), size = n_to_draw, replace = F) not_fia_draw = sample.int(n = nrow(not_fia_matches), size = n_to_draw, replace = F) subsamples[[i]] <- (bind_rows(fia_matches[fia_draw,], not_fia_matches[not_fia_draw, ])) } sub_di <- bind_rows(subsamples) %>% mutate(Dataset = ifelse(fia_yn == "fia", "FIA", "Other datasets")) simpson_ks <- filter(sub_di, nparts > 40) %>% select(simpson_95_ratio_1t, Dataset, simpson_percentile, s0, n0) %>% group_by(Dataset, s0, n0) %>% mutate(pairing = dplyr::row_number()) %>% ungroup() %>% tidyr::pivot_wider(id_cols = c(s0, n0, pairing), names_from = Dataset, values_from = c(simpson_95_ratio_1t, simpson_percentile)) simpson_breadth <- ks.test(simpson_ks$simpson_95_ratio_1t_FIA, simpson_ks$`simpson_95_ratio_1t_Other datasets`) simpson_percent <- ks.test(simpson_ks$simpson_percentile_FIA, simpson_ks$`simpson_percentile_Other datasets`) simpson_breadth simpson_percent skewness_ks <- filter(sub_di, nparts > 40) %>% select(skew_95_ratio_1t, Dataset, skew_percentile, s0, n0) %>% group_by(Dataset, s0, n0) %>% mutate(pairing = dplyr::row_number()) %>% ungroup() %>% tidyr::pivot_wider(id_cols = c(s0, n0, pairing), names_from = Dataset, values_from = c(skew_95_ratio_1t, skew_percentile)) skewness_breadth <- ks.test(skewness_ks$skew_95_ratio_1t_FIA, skewness_ks$`skew_95_ratio_1t_Other datasets`) skewness_percent <- ks.test(skewness_ks$skew_percentile_FIA, skewness_ks$`skew_percentile_Other datasets`) skewness_breadth skewness_percent fig_4 <- gridExtra::grid.arrange(grobs = list( ggplot(filter(sub_di, nparts > 20), aes(skew_95_ratio_1t)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Breadth index") + ylab("Number of communities") + ggtitle("Breadth index", subtitle = "Skewness"), ggplot(filter(sub_di, nparts > 20), aes(simpson_95_ratio_1t)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Breadth index") + ylab("") + ggtitle("", subtitle = "Evenness"), ggplot(filter(sub_di, nparts > 20), aes(skew_percentile_excl)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Percentile rank") + ylab("Number of communities") + ggtitle("Percentile rank", subtitle = "Skewness"), ggplot(filter(sub_di, nparts > 20), aes(simpson_percentile)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Percentile rank") + ylab("")+ ggtitle("", subtitle = "Evenness")), ncol = 2, top = textGrob("Figure 4", gp = gpar(fill = "white"))) plot(fig_4)
pdf("figure_1.pdf", title = "Figure 1", bg = "white", paper = "letter", width = 6.5, height = 6.5) gridExtra::grid.arrange(grobs = breadth_plots, ncol = 2, top = textGrob("Figure 1", gp = gpar(fill = "white"))) dev.off() pdf("figure_2.pdf", title = "Figure 2", bg = "white", paper = "letter", width = 6.5, height = 6.5) gridExtra::grid.arrange(grobs = list( ggplot(filter(all_di, s0 > 2, nparts > 20), aes(skew_percentile_excl)) + geom_histogram(bins = 100) + geom_vline(xintercept = 95, color = "red") + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + ggtitle("Skewness") + xlab("") + ylab(""),# + # xlab("Percentile rank of observed value relative to feasible set") + #ylab("Number of communities") , ggplot(filter(all_di, nparts > 20), aes(simpson_percentile)) + geom_histogram(bins = 100) + geom_vline(xintercept = 5, color = "red") + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + ggtitle("Evenness") + xlab("") + ylab("")#+ #xlab("Percentile rank of observed value relative to feasible set") + # ylab("Number of communities") ), ncol = 2, top = textGrob("Figure 2", gp = gpar(fill = "white")), left = textGrob("Number of communities", rot = 90, gp = gpar(fill = "black")), bottom = textGrob("Percentile rank of observed value relative to feasible set"), gp = gpar(fill = "white")) dev.off() pdf("figure_3.pdf", title = "Figure 3", bg = "white", paper = "letter", width = 6.5, height = 6.5) gridExtra::grid.arrange(grobs = list( ggplot(filter(all_di, s0 > 2, nparts > 20), aes(x= skew_95_ratio_1t)) + geom_histogram() + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + xlab("") + ylab("") + ggtitle("Skewness") + xlim(0, 1), ggplot(filter(all_di, nparts > 20), aes(x = simpson_95_ratio_1t)) + geom_histogram() + theme_bw() + facet_wrap(vars(Dataset), scales = "free_y", ncol = 1) + xlab("") + ylab("") + ggtitle("Evenness") + xlim(0, 1) ), ncol = 2, top = textGrob("Figure 3", gp = gpar(fill = "white")), left = "Number of communities", bottom = "Breadth index (ranges 0-1, with 1 being very broad)") dev.off() pdf("figure_4.pdf", title = "Figure 4", bg = "white", paper = "letter", width = 6.5, height = 6.5) gridExtra::grid.arrange(grobs = list( ggplot(filter(sub_di, nparts > 20), aes(skew_95_ratio_1t)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Breadth index") + ylab("Number of communities") + ggtitle("Breadth index", subtitle = "Skewness"), ggplot(filter(sub_di, nparts > 20), aes(simpson_95_ratio_1t)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Breadth index") + ylab("") + ggtitle("", subtitle = "Evenness"), ggplot(filter(sub_di, nparts > 20), aes(skew_percentile_excl)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Percentile rank") + ylab("Number of communities") + ggtitle("Percentile rank", subtitle = "Skewness"), ggplot(filter(sub_di, nparts > 20), aes(simpson_percentile)) + geom_histogram()+ theme_bw() + facet_wrap(vars(Dataset), scales = "free_y") + xlab("Percentile rank") + ylab("")+ ggtitle("", subtitle = "Evenness")), ncol = 2, top = textGrob("Figure 4", gp = gpar(fill = "white"))) dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.