In diazrenata/sadspace: Exploring SAD space

knitr::opts_chunk$set(echo = F)
library(drake)
library(sadspace)
library(dplyr)
library(ggplot2)
library(ggpmisc)

sv <- define_statevars() %>%
  assign_ptable() %>%
  dplyr::filter(p_table != "none")

#di_df <- read.csv(here::here("analysis", "di_dfs.csv"), stringsAsFactors = F)
fs_size_dat <- read.csv(here::here("analysis", "fs_size_dat.csv"), stringsAsFactors = F)
di_summary_df <- read.csv(here::here("analysis", "di_summary_df.csv"), stringsAsFactors = F)

Here is the range of S and N space covered:

ggplot(sv, aes(s0, n0, color = n0/s0)) +
  geom_point() +
  theme_bw() +
  scale_color_viridis_c(option = "plasma") +
  theme(legend.position = "top")

ggplot(sv, aes(log(s0), log(n0), color = log(n0/s0))) +
  geom_point() +
  theme_bw()+
  scale_color_viridis_c(option = "plasma")+
  theme(legend.position = "top")

Things are generally easier to see on the log axis, so let's stick with that.

Size and unique elements from FS

Here is how the size of the feasible set varies with S, N, and N/S:

# 
# fs_size_dat <- di_df %>%
#   select(s0, n0, nparts, nunique) %>%
#   distinct() %>%
#   mutate(log_nparts = log(as.numeric(nparts)),
#          log_nunique = log(nunique))

ggplot(fs_size_dat, aes(log(s0), log(n0), color =log_nparts)) +
  geom_point() + 
  theme_bw()+
  scale_color_viridis_c(option = "plasma", begin = .1, end = .8)+
  theme(legend.position = "top")

This is on a log scale, so up in that right corner is 1.942426e+130.

For now I am interested in lognparts up to about 20

ggplot(filter(fs_size_dat, log_nparts <= 20), aes(log(s0), log(n0), color =log_nparts)) +
  geom_point() + 
  theme_bw()+
  scale_color_viridis_c(option = "plasma", begin = .1, end = .8)+
  theme(legend.position = "top")

fs_size_dat <- fs_size_dat %>%
  mutate(more_than_11 = log_nparts >= 11)


ggplot(filter(fs_size_dat, log_nparts <= 20), aes(log(s0), log(n0), color =more_than_11)) +
  geom_point() + 
  theme_bw()+
  scale_color_viridis_d(option = "plasma", begin = .1, end = .8)+
  theme(legend.position = "top")

These are the communities we'll pull out:

focal_communities <- data.frame(
  s0 = c(3, 3, 3, 5, 5, 5,  21, 21, 149, 149),
  n0 = c(13, 149, 1097, 13, 149, 1097, 34, 91, 156, 245)
)


focal_communities <- focal_communities %>%
  mutate(target_name = paste0("fs_", s0, "_", n0))

## this is scrap code to pull 3000 unique sims per focal community from cache on hpg. Drawing more gives v large .csvs for large communities.
# for(i in 1:nrow(focal_communities)) {
# thisobj = get_object(focal_communities$target_name[i])
# thisobj <- thisobj %>% filter(sim %in% unique(sim)[1:3000])
# write.csv(thisobj, here::here("analysis", "reports", paste0(focal_communities$target_name[i], ".csv")), row.names = F)
# }
focal_communities <- left_join(focal_communities, fs_size_dat, by = c("s0", "n0"))

ggplot(filter(fs_size_dat, log_nparts <= 20), aes(log(s0), log(n0), color =more_than_11)) +
  geom_point(alpha = .2) + 
  geom_point(data = focal_communities, alpha = 1, shape = 8, size = 2) +
  theme_bw()+
  scale_color_viridis_d(option = "plasma", begin = .1, end = .8)+
  theme(legend.position = "top")

focal_communities <- focal_communities %>%
  mutate(file_name = here::here("analysis", "reports", paste0(target_name, ".csv")))

focal_fs <- list()
for(i in 1:nrow(focal_communities)) {
  focal_fs[[i]] <- read.csv(focal_communities$file_name[i], stringsAsFactors = F)

  focal_fs[[i]] <-  focal_fs[[i]] %>%
    group_by(sim, nparts, s0, n0) %>%
    mutate(skew = e1071::skewness(abund),
           simpson = vegan::diversity(abund, index = "simpson")) %>%
    ungroup() %>%
    mutate(s0_f = as.factor(s0),
           n0_f = as.factor(n0))
}

focal_fs <- bind_rows(focal_fs)

focal_fs <- mutate(focal_fs, more_than_11 = log(as.numeric(nparts)) >= 11)

focal_fs_summary <- focal_fs %>%
  select(-rank, -abund, -s0_f, -n0_f) %>%
  distinct() %>%
  mutate(community = paste0(s0, "_", n0))

Here are the FS of those distributions:

ggplot(focal_fs, aes(x = rank, y = abund, group = sim, color = more_than_11)) +
  geom_line(alpha = .2) +
  facet_wrap(vars(s0_f, n0_f), scales = "free") +
  theme_bw()

Here are histograms of skew/even for these distributions:

ggplot(focal_fs_summary, aes(x = skew, fill = more_than_11)) +
  geom_histogram() +
  theme_bw() +
  facet_grid(rows = vars(n0), cols = vars(s0), scales = "free")

ggplot(focal_fs_summary, aes(x = simpson, fill = more_than_11)) +
  geom_histogram() +
  theme_bw() +
  facet_grid(rows = vars(n0), cols = vars(s0), scales = "free")