knitr::opts_chunk$set(echo = FALSE)
library(drake)
library(dplyr)
library(ggplot2)

all_di <- read.csv(here::here("analysis", "reports", "all_di.csv"), stringsAsFactors = F)

all_di <- all_di %>%
  mutate(log_nparts = log(gmp:::as.double.bigz(nparts)),
         log_nsamples = log(nsamples),
         log_s0 = log(s0),
         log_n0 = log(n0)) %>%
  filter(n0 != s0,
         s0 != 1,
         !singletons,
         n0 != (s0 + 1)) %>%
  mutate(dat = ifelse(grepl(dat, pattern = "fia"), "fia", dat),
         dat = ifelse(dat == "misc_abund_short", "misc_abund", dat)) %>%
  mutate(Dataset = dat,
         Dataset = ifelse(Dataset == "fia", "FIA", Dataset),
         Dataset = ifelse(Dataset == "bbs", "Breeding Bird Survey", Dataset),
         Dataset = ifelse(Dataset == "mcdb", "Mammal Communities", Dataset),
         Dataset = ifelse(Dataset == "gentry", "Gentry", Dataset),
         Dataset = ifelse(Dataset == "misc_abund", "Misc. Abundance", Dataset))

Table S6

fia_max_s = max(filter(all_di, dat == "fia")$s0)
fia_max_n = max(filter(all_di, dat == "fia")$n0)

fia = filter(all_di, dat == "fia")

small_di <- all_di %>%
  filter(s0 <= fia_max_s,
         n0 <= fia_max_n) %>%
  mutate(fia_yn = ifelse(dat == "fia", "fia", "other datasets"),
         right_corner = FALSE)

for(i in 1:nrow(small_di)) {
  if(small_di$dat[i] != "fia") {
    fia_match_s0 = filter(fia, s0 >= small_di$s0[i])

    max_n0_for_this_s0 = max(fia_match_s0$n0)

    small_di$right_corner[i] = small_di$n0[i] > max_n0_for_this_s0

  }
}


small_di <- small_di %>%
  filter(!right_corner)
not_fia <- filter(small_di, fia_yn != "fia")
fia = filter(small_di, dat == "fia")

small_di_s_n <- small_di %>%
  select(s0, n0) %>%
  distinct() %>%
  mutate(in_fia = FALSE,
         in_not_fia = FALSE)

for(i in 1:nrow(small_di_s_n)) {

  this_s0 = small_di_s_n$s0[i]
  this_n0 = small_di_s_n$n0[i]

  fia_matches = filter(fia, s0 == this_s0, n0 == this_n0)
  not_fia_matches = filter(not_fia, s0 == this_s0, n0 == this_n0)

  small_di_s_n$in_fia[i] = nrow(fia_matches) > 0 
  small_di_s_n$in_not_fia[i] = nrow(not_fia_matches) > 0 


}

small_di_s_n <- filter(small_di_s_n, in_fia, in_not_fia)

set.seed(1977)

subsamples <- list()

for(i in 1:nrow(small_di_s_n)) {

  this_s0 = small_di_s_n$s0[i]
  this_n0 = small_di_s_n$n0[i]

  fia_matches = filter(fia, s0 == this_s0, n0 == this_n0)

  not_fia_matches = filter(not_fia, s0 == this_s0, n0 == this_n0)

  n_to_draw = min(nrow(fia_matches), nrow(not_fia_matches))

  fia_draw = sample.int(n = nrow(fia_matches), size = n_to_draw, replace = F)
  not_fia_draw = sample.int(n = nrow(not_fia_matches), size = n_to_draw, replace = F)

  subsamples[[i]] <- (bind_rows(fia_matches[fia_draw,], not_fia_matches[not_fia_draw, ]))

}

sub_di <- bind_rows(subsamples) %>%
  mutate(Dataset = ifelse(fia_yn == "fia", "FIA", "Other datasets"))
sub_di %>%
  filter(s0 > 2) %>%
  group_by_all %>%
  mutate(skew_percentile_excl = ifelse(nparts >= 20, skew_percentile_excl, NA),
         simpson_percentile = ifelse(nparts >= 20, simpson_percentile, NA)) %>%
  ungroup() %>%
  group_by(Dataset) %>%
  summarize(prop_skew_high = mean(skew_percentile_excl > 95, na.rm = T),
            prop_even_low = mean(simpson_percentile < 5, na.rm = T),
            nsites_skew = sum(!is.na(skew_percentile_excl)),
            nsites_even = sum(!is.na(simpson_percentile))) %>%
   rename("Proportion of communities with skewness above 95th percentile" = prop_skew_high,
         "Number of communities analyzed for skewness" = nsites_skew,
         "Proportion of communities with evenness below 5th percentile" = prop_even_low,
         "Number of communities analyzed for evenness" = nsites_even)

Caption. Proportion of sites with highly skewed or uneven SADs for a subset of FIA communities and comparably-sized communities from other datasets. We found 371 pairs of communities for which a community from FIA had a counterpart from another dataset with exactly matching S and N. For these communities, the proportion of communities whose observed SADs are highly skewed or uneven relative to their sampled feasible sets does not differ between FIA and other datasets.



diazrenata/scadsanalysis documentation built on May 14, 2021, 6:59 p.m.