In diazrenata/scadsanalysis: What the Package Does (One Line, Title Case)

knitr::opts_chunk$set(echo = FALSE)
library(drake)
library(dplyr)
library(ggplot2)

all_di <- read.csv(here::here("analysis", "reports", "all_di.csv"), stringsAsFactors = F)

all_di <- all_di %>%
  mutate(log_nparts = log(gmp:::as.double.bigz(nparts)),
         log_nsamples = log(nsamples),
         log150_nparts = log(gmp:::as.double.bigz(nparts), base = 150)) %>%
  mutate(prop_found = exp(log_nsamples - log_nparts)) %>%
  mutate(found_all = prop_found==1) %>%
  filter(s0 >= 2,
         n0 > s0,
         dat %in% c("bbs", "gentry", "fia_short", "fia_small", "mcdb", "misc_abund_short")) %>%
  mutate(dat = ifelse(substr(dat, 0, 3) == "fia", yes = "fia", no = dat)) %>%
  mutate(dat = ifelse(dat == "misc_abund_short", "misc_abund", dat))%>%
  mutate(is_fia = ifelse(substr(dat, 0, 3) == "fia", dat, "other"))

Datasets in S and N space

Here is where our communities fall in S and N space:

all_dat_to_plot <- filter(all_di, singletons == F) %>%
  bind_rows(mutate(filter(all_di, singletons == F), dat = "all"))

ggplot(all_dat_to_plot, aes(x = log(s0), y = log(n0), color = dat)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") + scale_color_viridis_d(end = .9) +
  facet_wrap(vars(dat))

Here is how that translates into the size of the feasible set:

ggplot(filter(all_di, log_nparts <= 100, singletons == F), aes(x = log(s0), y = log(n0), color = log_nparts)) +
  geom_point(alpha = .5) +
  geom_point(data = filter(all_di, log_nparts > 100), color = "yellow", alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") + scale_color_viridis_c(option = "magma", end = .9, begin = .1)

ggplot(filter(all_di, singletons == F), aes(x = log_nparts, y = ..count..)) +
  geom_density() +
  theme_bw() +
  geom_vline(xintercept = c(10, 15))


ggplot(filter(all_di, singletons == F), aes(x = log_nparts, y = ..count..)) +
  geom_density() +
  theme_bw() +
  geom_vline(xintercept = c(10, 15)) +
  facet_wrap(vars(dat), scales = "free")

The small datasets are basically all FIA.

Note that the color scale is log transformed, so the largest communities have e^r max(all_di$log_nparts), or r exp(max(all_di$log_nparts)), elements in the feasible set!

95 interval (one tailed) vs nparts

skew_1t = ggplot(filter(all_di, singletons == F, s0 > 2), aes(x = log(s0), y = log(n0), color = skew_95_ratio_1t)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") + scale_color_viridis_c(option = "magma", end = .9, begin = .1)


simpson_1t = ggplot(filter(all_di, singletons == F), aes(x = log(s0), y = log(n0), color = simpson_95_ratio_1t)) +
  geom_point(alpha = .5) +
  theme_bw() +
  theme(legend.position = "top") + scale_color_viridis_c(option = "magma", end = .9, begin = .1)

gridExtra::grid.arrange(grobs = list(skew_1t, simpson_1t), nrow = 1)

Overall percentile values

ggplot(filter(all_di, !singletons, s0 > 2), aes(x = skew_percentile)) +
  geom_histogram() +
  theme_bw() +
  ggtitle("Skew percentile") +
  facet_wrap(vars(dat), scales = "free_y")


ggplot(filter(all_di, !singletons), aes(x = simpson_percentile)) +
  geom_histogram() +
  theme_bw() +
  ggtitle("Simpson percentile")+
  facet_wrap(vars(dat), scales = "free_y")

skew_outcomes <- filter(all_di, !singletons, s0 > 2) %>%
  group_by(is_fia) %>%
  summarize(high_skew_prop = mean(skew_percentile >= 95)) %>%
  ungroup()

even_outcomes <- filter(all_di, !singletons) %>%
  group_by(is_fia) %>%
  summarize(low_even_prop = mean(simpson_percentile <= 5)) %>%
  ungroup()

Within the FIA size range

fia_size_range <- filter(all_di, !singletons)

fia_max_s = max(filter(all_di, dat == "fia")$s0)
fia_max_n = max(filter(all_di, dat == "fia")$n0)

fia_size_range <- filter(fia_size_range,
                         s0 <= fia_max_s,
                         n0 <= fia_max_n) 

skew_fia <- ggplot(filter(fia_size_range, s0 > 2), aes(x = skew_percentile)) +
  geom_histogram() +
  facet_wrap(vars(is_fia), scales = "free_y") +
  theme_bw() +
  ggtitle("Skew percentile - within FIA size range")


simpson_fia <- ggplot(filter(fia_size_range), aes(x = simpson_percentile)) +
  geom_histogram() +
  facet_wrap(vars(is_fia), scales = "free_y") +
  theme_bw() +
  ggtitle("Simpson percentile - within FIA size range")

gridExtra::grid.arrange(grobs = list(skew_fia, simpson_fia), nrow = 1)

all_di <- all_di %>%
  group_by_all() %>%
  mutate(is_fia_size = (s0 <= fia_max_s) && (n0 <= fia_max_n)) %>%
  ungroup() 

all_di <- all_di %>%
  mutate(is_fia_size = ifelse(is_fia_size, "FIA size", "Larger than FIA"))

skew_ratio <- ggplot(filter(all_di, s0 > 2), aes(x = skew_95_ratio_1t)) +
  geom_histogram() +
  theme_bw() +
  facet_wrap(vars(is_fia_size), scales = "free_y") +
  ggtitle("Narrowness of skewness")

even_ratio <- ggplot(filter(all_di, s0 > 2), aes(x = simpson_95_ratio_1t)) +
  geom_histogram() +
  theme_bw() +
  facet_wrap(vars(is_fia_size), scales = "free_y") +
  ggtitle("Narrowness of evenness")

gridExtra::grid.arrange(grobs = list(skew_ratio, even_ratio), nrow = 1)

Rarefaction

rarefied <- filter(all_di, singletons) %>%
  select(dat, site, skew_percentile, simpson_percentile) %>%
  rename(skew_percentile_rarefied = skew_percentile,
         simpson_percentile_rarefied = simpson_percentile)

rarefied <- right_join(all_di, rarefied)

skew_rare <- ggplot(filter(rarefied, s0 > 2), aes(x = skew_percentile, y = skew_percentile_rarefied)) +
  geom_point(alpha = .2) +
  geom_abline(slope = 1, intercept = 0, color = "green") +
  theme_bw() +
  facet_wrap(vars(dat)) +
  ggtitle("Skew percentile change with rarefaction")


even_rare <- ggplot(filter(rarefied), aes(x = simpson_percentile, y = simpson_percentile_rarefied)) +
  geom_point(alpha = .2) +
  geom_abline(slope = 1, intercept = 0, color = "green") +
  theme_bw() +
  facet_wrap(vars(dat)) +
  ggtitle("Evenness percentile change with rarefaction")


gridExtra::grid.arrange(grobs = list(skew_rare, even_rare), nrow = 1)

skew_rare_outcome <- rarefied %>%
  filter(s0 > 2) %>%
  group_by(is_fia) %>%
  summarize(high_skew = mean(skew_percentile_rarefied >= 95)) %>%
  ungroup()
skew_rare_outcome

even_rare_outcome <- rarefied %>%
  filter(s0 > 2) %>%
  group_by(is_fia) %>%
  summarize(low_even = mean(simpson_percentile_rarefied <= 5)) %>%
  ungroup()
even_rare_outcome