knitr::opts_chunk$set(echo = F) library(drake) library(sadspace) library(dplyr) library(ggplot2) library(ggpmisc)
#di_df <- read.csv(here::here("analysis", "di_dfs.csv"), stringsAsFactors = F) fs_size_dat <- read.csv(here::here("analysis", "fs_size_dat.csv"), stringsAsFactors = F) di_summary_df <- read.csv(here::here("analysis", "di_summary_df.csv"), stringsAsFactors = F)
Here is the range of S and N space covered, and nparts in FS:
ggplot(fs_size_dat, aes(log(s0), log(n0), color = log_nparts)) + geom_point() + theme_bw() + scale_color_viridis_c(option = "plasma") + theme(legend.position = "top")
This is on a log scale, so up in that right corner is 1.942426e+130.
ggplot(fs_size_dat, aes(log(n0/s0), log_nparts, group = as.factor(log(s0)), color = log(s0))) + geom_line() + theme_bw() + scale_color_viridis_c() + theme(legend.position = "top")
Note that the number of elements would continue to increase along each of these lines, except we hit the edge of the sampling space.
Here is the number of unique FS found, and the number of unique FS over the number that exist. Note that the number of draws was 5,000, so the maximum possible log(nunique) would be r log(5000)
. FS that found 5,000 unique draws are covered with black dots in this plot:
ggplot(fs_size_dat, aes(log(s0), log(n0), color = log_nunique)) + geom_point() + geom_point(data = filter(fs_size_dat, log_nunique >= 8.5), color = "black") + scale_color_viridis_c(option = "plasma", end = .9) + theme_bw() + theme(legend.position = "top")
Here is the number of unique samples found relative to the number possible. FS that found all the elements possible are covered with green dots:
ggplot(fs_size_dat, aes(log(s0), log(n0), color = log_nunique - log_nparts)) + geom_point() + geom_point(data = filter(fs_size_dat, log_nunique == log_nparts), color = "green")+ scale_color_viridis_c(option = "plasma", end = .8, begin = .1, direction = -1) + theme_bw() + theme(legend.position = "top")
Both skewness and the variability in skewness decrease with N/S, and increase with S. For a given S, a higher N/S (so higher N) will have lower skew and lower variability.
I don't think it matters very much whether you look at mean or median/range or sd.
ggplot(data = filter(di_summary_df, s0 > 3, !is.infinite(mean_skew), !is.infinite(sd_skew), !is.na(mean_skew), !is.na(sd_skew)), aes(log(s0), log(n0), color = mean_skew)) + geom_point() + theme_bw() + scale_color_viridis_c(option = "plasma") + theme(legend.position = "top") ggplot(data = filter(di_summary_df, s0 > 3, !is.infinite(mean_skew), !is.infinite(sd_skew), !is.na(mean_skew), !is.na(sd_skew)), aes(log_nparts, mean_skew, color = log(n0), group = log(s0))) + geom_line() + theme_bw() + geom_vline(xintercept = 10) + scale_color_viridis_c() + theme(legend.position = "top") ggplot( data = filter(di_summary_df, s0 > 3, !is.infinite(mean_skew), !is.infinite(sd_skew), !is.na(mean_skew), !is.na(sd_skew)), aes(log(s0), log(n0), color = sd_skew)) + geom_point() + theme_bw() + scale_color_viridis_c(option = "plasma") + theme(legend.position = "top") ggplot(data = filter(di_summary_df, s0 > 3, !is.infinite(mean_skew), !is.infinite(sd_skew), !is.na(mean_skew), !is.na(sd_skew)), aes(log_nparts, sd_skew, color = log(n0), group = log(s0))) + geom_line() + theme_bw() + geom_vline(xintercept = 10) + scale_color_viridis_c() + theme(legend.position = "top") ggplot(data =filter(di_summary_df, s0 > 3, !is.infinite(mean_skew), !is.infinite(sd_skew), !is.na(mean_skew), !is.na(sd_skew)), aes(log(s0), log(n0), color = abs(sd_skew/mean_skew))) + geom_point() + theme_bw() + scale_color_viridis_c(option = "plasma") + theme(legend.position = "top") ggplot(data = filter(di_summary_df, s0 > 3, !is.infinite(mean_skew), !is.infinite(sd_skew), !is.na(mean_skew), !is.na(sd_skew)), aes(log_nparts, abs(sd_skew/mean_skew), color = log(n0), group = log(s0))) + geom_line() + theme_bw() + geom_vline(xintercept = 10) + scale_color_viridis_c() + theme(legend.position = "top")
ggplot(data = filter(di_summary_df, s0 >= 2, !is.infinite(mean_simpson), !is.infinite(sd_simpson), !is.na(mean_simpson), !is.na(sd_simpson)), aes(log(s0), log(n0), color = mean_simpson)) + geom_point() + theme_bw() + scale_color_viridis_c(option = "plasma") + theme(legend.position = "top") ggplot(data = filter(di_summary_df, s0 >= 2, !is.infinite(mean_simpson), !is.infinite(sd_simpson), !is.na(mean_simpson), !is.na(sd_simpson)), aes(log_nparts, mean_simpson, color = log(n0), group = log(s0))) + geom_line() + theme_bw() + geom_vline(xintercept = 10) + scale_color_viridis_c() + theme(legend.position = "top") ggplot( data = filter(di_summary_df, s0 >= 2, !is.infinite(mean_simpson), !is.infinite(sd_simpson), !is.na(mean_simpson), !is.na(sd_simpson)), aes(log(s0), log(n0), color = sd_simpson)) + geom_point() + theme_bw() + scale_color_viridis_c(option = "plasma") + theme(legend.position = "top") ggplot(data = filter(di_summary_df, s0 >= 2, !is.infinite(mean_simpson), !is.infinite(sd_simpson), !is.na(mean_simpson), !is.na(sd_simpson)), aes(log_nparts, (sd_simpson), color = log(n0), group = log(s0))) + geom_line() + theme_bw() + geom_vline(xintercept = 10) + scale_color_viridis_c() + theme(legend.position = "top") ggplot(data =filter(di_summary_df, s0 >= 2, !is.infinite(mean_simpson), !is.infinite(sd_simpson), !is.na(mean_simpson), !is.na(sd_simpson)), aes(log(s0), log(n0), color = abs(sd_simpson/mean_simpson))) + geom_point() + theme_bw() + scale_color_viridis_c(option = "plasma") + theme(legend.position = "top") ggplot(data = filter(di_summary_df, s0 >= 2, !is.infinite(mean_simpson), !is.infinite(sd_simpson), !is.na(mean_simpson), !is.na(sd_simpson)), aes(log_nparts, abs(sd_simpson/mean_simpson), color = log(n0), group = log(s0))) + geom_line() + theme_bw() + geom_vline(xintercept = 10) + scale_color_viridis_c() + theme(legend.position = "top")
scadsanalysis
sa <- read.csv(here::here("analysis", "all_di.csv"), stringsAsFactors = F) sa_sv <- sa %>% filter(!singletons) ggplot(fs_size_dat, aes(x = log(s0), y = log(n0))) + geom_point(color = "blue", alpha = .5) + theme_bw() + geom_point(data = sa_sv, color = "red", alpha = .5)
the sharpness of the threshold is an artifact of the way we don't have a lot of low s/high n communities in the datasets we started with.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.