In diazrenata/scadsanalysis: What the Package Does (One Line, Title Case)

knitr::opts_chunk$set(echo = FALSE)
library(drake)
library(dplyr)
library(ggplot2)
library(grid)
theme_set(theme_bw())

all_di <- read.csv(here::here("analysis", "reports", "submission2", "all_di.csv"), stringsAsFactors = F)

all_ct <-  read.csv(here::here("analysis", "reports", "submission2", "all_ct.csv"), stringsAsFactors = F)


#fia_ct <- read.csv(here::here("fia_cts.csv"))

#all_ct <- rbind(all_ct, fia_ct)
all_ct <- all_ct %>%
  mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat),
         dat = ifelse(dat == "misc_abund_short", "misc_abund", dat))

all_di <- all_di %>%
  mutate(log_nparts = log(gmp:::as.double.bigz(nparts)),
         log_nsamples = log(nsamples),
         log_s0 = log(s0),
         log_n0 = log(n0)) %>%
  filter(n0 != s0,
         s0 != 1,
         !singletons,
         n0 != (s0 + 1)) %>%
  mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat),
         dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>%
  mutate(Dataset = dat,
         Dataset = ifelse(Dataset == "fia", "FIA", Dataset),
         Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset),
         Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset),
         Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset),
         Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset)) %>%
  filter(nparts > 20) %>%
  left_join(all_ct) 

all_di <- all_di %>%
  group_by_all() %>%
  mutate(real_po_percentile_mean = mean(real_po_percentile, real_po_percentile_excl),
         skew_percentile_mean = mean(skew_percentile, skew_percentile_excl),
         simpson_percentile_mean = mean(simpson_percentile,simpson_percentile_excl),
         shannon_percentile_mean = mean(shannon_percentile, shannon_percentile_excl),
         nsingletons_percentile_mean = mean(nsingletons_percentile, nsingletons_percentile_excl),) %>%
  ungroup()

all_di <- all_di %>%
  mutate(in_fia = ifelse(Dataset == "FIA", "FIA", "Other datasets"))

plot_percentile_hist <- function(di_df, col_name, plot_name, tails = 2, facetvar = "Dataset", min_s0 = 0) {

  if(tails == 2) {
    cutoff_percentiles = c(2.5, 97.5)
    min_nparts = 40
  } else if (tails== 1) {
    cutoff_percentiles = c(95)
    min_nparts = 20
  }

  di_df <- di_df %>%
    mutate(response = (di_df[[col_name]]),
           facetvar = di_df[[facetvar]])


  ggplot(filter(di_df, nparts > min_nparts, s0 >= min_s0), aes(response)) +
    geom_histogram(bins = 40, boundary = 100) +
    theme_bw() +
    xlab("") +
    ylab("") +
    geom_vline(xintercept = cutoff_percentiles, color = "red") +
    ggtitle( plot_name) +
    facet_wrap(vars(facetvar), ncol = 1, scales = "free_y")+
    theme(plot.title = element_text(size=10))



}

all_di <- all_di %>%
  mutate(`Observed \npercentile score` = ifelse(real_po_percentile_excl > 95, "High (> 95)", "Less than 95"))

ggplot(all_di, aes(sim_pos_from_best, real_po)) +
  geom_point(alpha = .05) +
  geom_point(data = filter(all_di, s0 < 0)) +
  geom_abline(slope = 1, intercept = 0) +
  scale_color_viridis_d(end = .8, direction = -1) +
  xlab("95th percentile for elements from feasible set") +
  ylab("Observed value") +
  ggtitle("Dissimilarity to the central tendency") +  theme(legend.position = "bottom")+
  facet_wrap(vars(Dataset))
#,
#
#
# ggplot(filter(all_di, nparts < 10 ^ 50), aes(nparts, sim_pos_from_best, color = Dataset)) +
#   geom_point(alpha = .3) +
#   geom_point(data = filter(all_di, s0 < 0)) +
#   scale_color_viridis_d(end = .9) +
#   xlab("Number of elements in the feasible set") +
#   ylab("Mean dissimilarity of feasible set to central tendency") +
#   scale_x_log10() +
#   ggtitle("Over the size of the feasible set")
# ), ncol = 1,
#top = textGrob("Dissimilarity to central tendency"))
increase <- all_di %>% filter(real_po_percentile_excl > 95) %>% mutate(difference = real_po - sim_pos_from_best_95, ratio = real_po / sim_pos_from_best_95)

Figure S4. Dissimilarity of observed and sampled SADs to the central tendency of the feasible set. Observed SADs are often much more dissimilar to the central tendency of their feasible sets (y-axis) than the 95th percentile dissimilarity of samples from the feasible set and the central tendency (x-axis). Dissimilarity can range from 0-1. The black line is the 1:1 line. Of observed SADs that are more disssimilar to the central tendency than are 95% of samples from the feasible set, the absolute increase in dissimlarity ranges from r round(min(increase$difference), 4) to r round(max(increase$difference), 3). These observed SADs are from r round(min(increase$ratio), 3) to r round(max(increase$ratio), 2) times more dissimilar to the central tendency than the 95th percentile of dissimilarity scores for samples from the feasible set.

diazrenata/scadsanalysis documentation built on May 14, 2021, 6:59 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Tweet to @rdrrHQ

GitHub issue tracker

ian@mutexlabs.com