knitr::opts_chunk$set(echo = FALSE) library(drake) library(dplyr) library(ggplot2) library(grid) theme_set(theme_bw()) all_di <- read.csv(here::here("analysis","reports", "submission2", "all_di.csv"), stringsAsFactors = F) all_ct <- read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv"), stringsAsFactors = F) #fia_ct <- read.csv(here::here("fia_cts.csv")) #all_ct <- rbind(all_ct, fia_ct) all_ct <- all_ct %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) all_di <- all_di %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20) %>% left_join(all_ct) %>% mutate(Dataset_short = ifelse(Dataset == "FIA", "FIA", ifelse(Dataset == "Breeding Bird Survey", "BBS", ifelse(Dataset == "Mammal Communities", "Mammals", ifelse(Dataset == "Gentry", "Gentry", "Misc."))))) all_di <- all_di %>% group_by_all() %>% mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl), skew_percentile_mean = mean(skew_percentile, skew_percentile_excl), simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl), shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl), nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl),) %>% ungroup() all_di <- all_di %>% mutate(in_fia = ifelse(Dataset == "FIA", "FIA", "Other datasets"))
plot_percentile_hist <- function(di_df, col_name, plot_name, tails = 2, facetvar = "Dataset", min_s0 = 0) { if(tails == 2) { cutoff_percentiles = c(2.5, 97.5) min_nparts = 40 } else if (tails== 1) { cutoff_percentiles = c(95) min_nparts = 20 } di_df <- di_df %>% mutate(response = (di_df[[col_name]]), facetvar = di_df[[facetvar]]) ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) + geom_histogram(bins = 40, boundary = 100) + theme_bw() + xlab("") + ylab("") + geom_vline(xintercept = cutoff_percentiles, color = "red") + ggtitle( plot_name) + facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+ theme(plot.title = element_text(size=9), plot.margin=unit(c(1, 0, -5, -1), units = "mm"), axis.text = element_text(size = 6), strip.text = element_text(size = 7), strip.placement = "inside") + scale_y_continuous(n.breaks = 3) } all_di <- all_di %>% mutate(`Observed \npercentile score` = ifelse(real_po_percentile_excl > 95, "High (> 95)", "Less than 95"))
plot_narrowness <- function(di_df, yvar, yvar_name) { if(grepl("sim_pos", yvar)) { min_nparts = 20 ylabel = "Mean dissimilarity" } else { min_nparts = 40 ylabel = "Breadth index" } di_df <- di_df %>% mutate(response = (di_df[[yvar]])) %>% filter(nparts > min_nparts, nparts < 10 ^ 50) ggplot(di_df, aes(nparts, response, color = Dataset)) + geom_point(data = filter(di_df, dat == "fia"), alpha = .1) + geom_point(data = filter(di_df, dat != "fia"), alpha = .1) + geom_point(data = filter(di_df, s0 < 0)) + scale_color_viridis_d(end = .9) + xlab("") + ylab(ylabel) + scale_x_log10() + ggtitle(yvar_name) + theme(legend.position = "none")+ theme(plot.title = element_text(size=10)) + scale_y_continuous(nbreaks =3) }
all_di_with_resamples <- read.csv(here::here("analysis", "reports", "submission2","all_di.csv")) jk_di_full <- read.csv(here::here("analysis", "reports", "submission2", "all_di_jk.csv")) all_ct_with_resamples <- read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv")) jk_ct <- read.csv(here::here("analysis", "reports", "submission2", "all_ct_jk.csv")) jk_di_full <- left_join(jk_di_full, jk_ct) all_di_with_resamples <- left_join(all_di_with_resamples, all_ct_with_resamples) jk_di <- jk_di_full %>% group_by_all() %>% mutate(site_source = unlist(strsplit(site, "_")[[1]][[1]])) %>% ungroup() %>% select(dat, site, site_source, s0, n0, skew_percentile_excl,skew_percentile, simpson_percentile, simpson_percentile_excl, shannon_percentile, shannon_percentile_excl, nsingletons_percentile, nsingletons_percentile_excl, real_po_percentile, real_po_percentile_excl, sim_pos_from_best, skew_95_ratio_2t, simpson_95_ratio_2t, shannon_95_ratio_2t, nparts) skewcols <- colnames(jk_di)[ grepl("skew", colnames(jk_di))] jk_di[ which(jk_di$s0 < 3), skewcols] <- NA jk_di_means <- jk_di %>% filter(n0 != s0, s0 != 1, n0 != (s0 + 1)) %>% group_by(dat, site_source) %>% mutate(njks = dplyr::n(), njks_skew = sum(!is.na(skew_percentile)))%>% summarise_at(vars(-group_cols(), -site), mean, na.rm = T) %>% mutate(resampling = "Subsampling") %>% mutate(site = as.numeric(site_source)) %>% # select(-site_source) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20, njks == 10) %>% ungroup() jk_di[ which(jk_di$njks_skew < 10), skewcols] <- NA skewcols <- colnames(all_di_with_resamples[ which(grepl("skew", colnames(all_di_with_resamples)))]) all_di_with_resamples[ which(all_di_with_resamples$s0 < 3), skewcols] <- NA all_di_with_resamples <- all_di_with_resamples %>% mutate(log_nparts = log(gmp:::as.double.bigz(nparts)), log_nsamples = log(nsamples), log_s0 = log(s0), log_n0 = log(n0)) %>% filter(n0 != s0, s0 != 1, # !singletons, n0 != (s0 + 1)) %>% mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat), dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>% mutate(Dataset = dat, Dataset = ifelse(Dataset == "fia", "FIA", Dataset), Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset), Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset), Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset), Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>% filter(nparts > 20) %>% mutate(resampling = ifelse(singletons, "Rare species", "Raw")) %>% bind_rows(jk_di_means) metric_rs_results <- all_di_with_resamples %>% group_by(Dataset, resampling) %>% summarize( skew_high = mean(skew_percentile_excl > 97.5, na.rm = T), skew_low = mean(skew_percentile < 2.5, na.rm = T), even_high = mean(simpson_percentile_excl > 97.5, na.rm = T), even_low = mean(simpson_percentile < 2.5, na.rm = T), shannon_high = mean(shannon_percentile_excl > 97.5, na.rm = T), shannon_low = mean(shannon_percentile < 2.5, na.rm = T), nsingletons_high = mean(nsingletons_percentile_excl > 97.5, na.rm = T), nsingletons_low = mean(nsingletons_percentile < 2.5, na.rm = T), nskew = sum(!is.na(skew_percentile)), nother = dplyr::n() ) %>% ungroup() diss_rs_results <- all_di_with_resamples %>% filter(nparts > 40) %>% group_by(Dataset, resampling) %>% summarize(dissimilarity_high = mean(real_po_percentile_excl > 95, na.rm = T), ndiss = sum(!is.na(real_po_percentile_excl))) %>% ungroup() rs_results <- left_join(metric_rs_results, diss_rs_results) #metric_cols <- colnames(rs_results [ grepl("_", colnames(rs_results))]) as.perc <- function(x) { return((signif(100 * x, digits = 2))) } rs_results_desc <- rs_results %>% mutate(`High dissimilarity` = paste0(as.perc(dissimilarity_high), "%; n =", ndiss), `High proportion of rare species` = paste0(as.perc(nsingletons_high), "%; n =", nother), `High skew` = paste0(as.perc(skew_high), "%; n = ", nskew), `Low Simpson` = paste0(as.perc(even_low), "%; n = ", nother), `Low Shannon` = paste0(as.perc(shannon_low), "%; n = ", nother), `Low proportion of rare species` = paste0(as.perc(nsingletons_low), "%; n =", nother), `Low skew` = paste0(as.perc(skew_low), "%; n = ", nskew), `High Simpson` = paste0(as.perc(even_high), "%; n = ", nother), `High Shannon` = paste0(as.perc(shannon_high), "%; n = ", nother), `Resampling scheme` = ordered(resampling, levels = c("Raw", "Subsampling", "Rare species"))) %>% arrange(Dataset, `Resampling scheme`) cols1 <- c("Dataset", "Resampling scheme", "High dissimilarity", "High proportion of rare species", "High skew", "Low Simpson", "Low Shannon") cols2 <- c("Dataset","Resampling scheme", "Low proportion of rare species", "Low skew", "High Simpson", "High Shannon")
rs_results_long <- rs_results %>% tidyr::pivot_longer((-c("Dataset","resampling")), names_to = "metric", values_to = "value") %>% filter(grepl("_", metric)) %>% mutate(ometric = ordered(metric, levels = c("skew_high", "skew_low", "even_low", "even_high", "shannon_low", "shannon_high", "nsingletons_high", "nsingletons_low", "dissimilarity_high")), ometric2 = ordered(metric, levels = c("dissimilarity_high", "nsingletons_high", "skew_high", "even_low", "shannon_low", "nsingletons_low","skew_low","even_high","shannon_high"))) %>% mutate(weird_dir = as.numeric(ometric2) > 5, percentile_cutoff = ifelse(grepl("diss", metric), .05, .025),`Resampling scheme` = ordered(resampling, levels = c("Raw", "Subsampling", "Rare species"))) metric_names <- rs_results_long %>% select(ometric2) %>% distinct() %>% arrange(ometric2) %>% mutate(metric_desc =(c("High dissimilarity", "High proportion \nof rare species", "High skew", "Low Simpson", "Low Shannon", "Low proportion \nof rare species", "Low skew", "High Simpson", "High Shannon"))) %>% mutate(metric_desc = ordered(metric_desc)) rs_results_long <- left_join(rs_results_long, metric_names) # # ggplot(filter(rs_results_long, !weird_dir), aes(metric_desc, value, color = `Resampling scheme`)) + # geom_point() + # facet_wrap(vars(Dataset), scales = "fixed", ncol = 1) + # geom_point(aes(y = percentile_cutoff), shape = 95, color = "black", size = 10) + # scale_color_viridis_d(end = .8) + # ylab("Proportion of extreme observed values") + # xlab("")
all_di_with_resamples <- all_di_with_resamples %>% group_by_all() %>% mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl, na.rm = T), skew_percentile_mean = mean(skew_percentile, skew_percentile_excl, na.rm = T), simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl, na.rm = T), shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl, na.rm = T), nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl)) %>% ungroup() fig_2 <- gridExtra::grid.arrange(grobs = list( plot_percentile_hist(filter(all_di_with_resamples, resampling == "Subsampling"), "real_po_percentile_mean", "Dissimilarity to the \ncentral tendency", facetvar = "Dataset", tails = 1), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Subsampling"), "nsingletons_percentile_mean", "\nNumber of rare species", facetvar = "Dataset"), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Subsampling"), "skew_percentile_mean", "\nSkewness", facetvar = "Dataset", min_s0 = 3), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Subsampling"), "simpson_percentile_mean", "\nSimpson evenness", facetvar = "Dataset"), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Subsampling"), "shannon_percentile_mean", "\nShannon diversity", facetvar = "Dataset") ), ncol = 3, top = textGrob("Subsampling results", gp = gpar(fill = "white")), left = textGrob("Number of communities", rot = 90, gp = gpar(fill = "black")), bottom = textGrob("Percentile rank of observed value relative to feasible set"), gp = gpar(fill = "white"))
fig_2 <- gridExtra::grid.arrange(grobs = list( plot_percentile_hist(filter(all_di_with_resamples, resampling == "Rare species"), "real_po_percentile_mean", "Dissimilarity to the \ncentral tendency", facetvar = "Dataset", tails = 1), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Rare species"), "nsingletons_percentile_mean", "\nNumber of rare species", facetvar = "Dataset"), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Rare species"), "skew_percentile_mean", "\nSkewness", facetvar = "Dataset", min_s0 = 3), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Rare species"), "simpson_percentile_mean", "\nSimpson evenness", facetvar = "Dataset"), plot_percentile_hist(filter(all_di_with_resamples, resampling == "Rare species"), "shannon_percentile_mean", "\nShannon diversity", facetvar = "Dataset") ), ncol =3, top = textGrob("Adjusted for rare species results", gp = gpar(fill = "white")), left = textGrob("Number of communities", rot = 90, gp = gpar(fill = "black")), bottom = textGrob("Percentile rank of observed value relative to feasible set"), gp = gpar(fill = "white"))
gridExtra::grid.arrange(grobs = list( ggplot(filter(rs_results_long, !weird_dir), aes(metric_desc, value, color = `Resampling scheme`)) + geom_point() + facet_wrap(vars(Dataset), scales = "fixed", ncol =5) + geom_point(aes(y = percentile_cutoff), shape = 95, color = "black", size = 10) + scale_color_viridis_d(end = .8, option = "plasma") + ylab("Proportion of extreme \nobserved values") + xlab("") +ylim(0, .7) + theme(legend.position = "bottom", axis.text.x = element_text(angle = 90), plot.margin = unit(c(5, 5, 2, 2), units = "mm"))))
gridExtra::grid.arrange(grobs = list( ggplot(filter(rs_results_long, weird_dir), aes(metric_desc, value, color = `Resampling scheme`)) + geom_point() + facet_wrap(vars(Dataset), scales = "fixed", ncol =5) + geom_point(aes(y = percentile_cutoff), shape = 95, color = "black", size = 10) + scale_color_viridis_d(end = .8, option = "plasma") + ylab("Proportion of extreme \nobserved values") + xlab("") +ylim(0, .7) + theme(legend.position = "bottom", axis.text.x = element_text(angle = 90), plot.margin = unit(c(5, 5, 2, 2), units = "mm"))))
rs_results_desc %>% select(cols1)
rs_results_desc %>% select(cols2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.