In diazrenata/scads: What the Package Does (One Line, Title Case)

knitr::opts_chunk$set(echo = FALSE)
library(ggplot2)
library(meteR)
library(dplyr)
library(scads)

set.seed(1994)
nsamples <- 25

datname = "rodents"
load(file = here::here("reports", "comparison_functions_stash", paste0(datname, "_stash.RData")))
portal_data <- isds::get_toy_portal_data(download=T, years = c(1990:1995))

portal_sad <- portal_data %>%
  select(species) %>%
  group_by(species) %>%
  summarize(abund = n()) %>%
  ungroup() %>%
  select(abund) %>%
  arrange(abund) %>%
  as.matrix() %>%
  as.vector()

nspp = length(portal_sad)
nind = sum(portal_sad)

portal_sad <- data.frame(
  abund = portal_sad,
  source = "emp",
  rank = 1:length(portal_sad)
)

# mete
## constuct as 50 draws then split into two groups of 25
mete_draws <- sample_METE(s = nspp, n = nind, nsamples = 2*nsamples, distinct = T)

mete_samples <- list(t(mete_draws[, 1:nsamples]), t(mete_draws[, (nsamples+1):ncol(mete_draws)]))

rm(mete_draws)

mete_samples <- lapply(mete_samples, FUN = function(some_samples)
  return(some_samples %>%
           as.data.frame() %>%
           mutate(sim = row_number()) %>%
           tidyr::gather(-sim, key = "rank", value = "abund") %>%
           mutate(rank = substr(rank, 2, nchar(rank))) %>%
           mutate(rank = as.integer(rank),
                  source = "mete")))

# fs
# 
# fs_draws <- sample_feasibleset(s = nspp, n = nind, nsamples = 2*nsamples, distinct = T)
# 
# fs_samples <- list(t(fs_draws[, 1:nsamples]), t(fs_draws[, (nsamples +1):ncol(fs_draws)]))
# 
# rm(fs_draws)
# 
# fs_samples <- lapply(fs_samples, FUN = function(some_samples)
#   return(some_samples %>%
#            as.data.frame() %>%
#            mutate(sim = row_number()) %>%
#            tidyr::gather(-sim, key = "rank", value = "abund") %>%
#            mutate(rank = substr(rank, 2, nchar(rank))) %>%
#            mutate(rank = as.integer(rank),
#                   source = "fs")))

# poilog

pl_best_fit <- poilog::poilogMLE(portal_sad$abund)

pl_draws <- replicate(n = nsamples * 8, expr = sort(poilog::rpoilog(S = nspp, mu = pl_best_fit$par[1], sig = pl_best_fit$par[2])), simplify = T)

keep_draw <- vapply(pl_draws, 
                    FUN = function(draw) 
                      return(length(draw) == nspp),
                    FUN.VALUE = TRUE)

pl_draws <- pl_draws[keep_draw]
pl_draws <- pl_draws[1:nsamples]
names(pl_draws) <- 1:nsamples

pl_samples <- as.data.frame(pl_draws) %>%
  t() %>%
  as.data.frame() %>%
  mutate(sim = row_number()) %>%
  tidyr::gather(-sim, key = "rank", value = "abund") %>%
  mutate(rank = substr(rank, 2, nchar(rank))) %>%
  mutate(rank = as.integer(rank),
         source = "poilog") 

rm(pl_draws)
rm(keep_draw)
rm(pl_best_fit)



# logseries unconstrained by N

this_esf <- meteR::meteESF(S0 = nspp, N0 = nind)
this_sad <- meteR::sad(this_esf)

ls_draws <- replicate(n = nsamples,
                      expr = sort(this_sad$r(n = nspp))) %>%
  t()
ls_samples <- apply(ls_draws, MARGIN = 2, FUN = sort) %>%
  as.data.frame() %>%
  mutate(sim = row_number()) %>%
  tidyr::gather(-sim, key = "rank", value = "abund")%>%
  mutate(rank = substr(rank, 2, nchar(rank))) %>%
  mutate(rank = as.integer(rank),
         source = "logseries") 


rm(ls_draws)
rm(this_esf)
rm(this_sad)

# negative binomial
negbin_bestfit <- fitdistrplus::fitdist(portal_sad$abund, distr = "nbinom")

negbin_samples <- replicate(n = nsamples * 5, expr = sort(rnbinom(n = nspp,
                                                                  size = negbin_bestfit$estimate[1], mu = negbin_bestfit$estimate[2]))) %>% 
  t()

any0 <- apply(negbin_samples, MARGIN = 1, FUN = function(draw) return(any(draw == 0)))

negbin_samples <- negbin_samples[ !any0, ]
negbin_samples <- negbin_samples[1:nsamples, ]

rm(negbin_bestfit)
rm(any0)

negbin_samples <- negbin_samples %>%
  as.data.frame() %>%
  mutate(sim = row_number()) %>%
  tidyr::gather(-sim, key = "rank", value = "abund")%>%
  mutate(rank = substr(rank, 2, nchar(rank))) %>%
  mutate(rank = as.integer(rank),
         source = "nbinom") 

flat_sample <- data.frame(
  sim = NA,
  rank = 1:nspp,
  abund = sort(scads::generate_even_sad(portal_sad$abund)),
  source = "even")

exp_sample <- data.frame(
  sim = NA,
  rank = 1:nspp,
  abund = sort(scads::generate_exponential_sad(portal_sad$abund)),
  source = "exponent"
)

precipice_sample <-  data.frame(
  sim = NA,
  rank = 1:nspp,
  abund = sort(scads::generate_precipice_sad(portal_sad$abund)),
  source = "precipice")

stepwise_sample <-  data.frame(
  sim = NA,
  rank = 1:nspp,
  abund = sort(scads::generate_stepwise_sad(portal_sad$abund)),
  source = "stepwise")

all_samples <- bind_rows(
  portal_sad, 
  mete_samples[[1]], 
  fs_samples[[1]],
  pl_samples,
  ls_samples,
  negbin_samples,
  flat_sample,
  exp_sample,
  precipice_sample,
  stepwise_sample
)

Raw rank abundance curves

raw_samples_plot <- ggplot(data = all_samples, aes(x = rank, y = abund)) +
  geom_point(alpha = .5) +
  theme_bw() +
  facet_wrap(facets = source ~ ., scales = "free_y")

raw_samples_plot

Sum of squared errors vs. number of polynomials used

sse_nleg <- all_samples %>%
  group_by(sim, source) %>%
  summarize(sse2 = sum((legendre_approx(abund, 2)$residuals)^2),
            sse3 = sum((legendre_approx(abund, 3)$residuals)^2),
            sse4 = sum((legendre_approx(abund, 4)$residuals)^2),
            sse5 = sum((legendre_approx(abund, 5)$residuals)^2),
            sse6 = sum((legendre_approx(abund, 6)$residuals)^2)) %>%
  ungroup() %>%
  tidyr::gather(-sim, -source, key = "nleg", value = "sse") %>%
  mutate(nleg = substr(nleg, 4, nchar(nleg))) %>%
  mutate(nleg = as.integer(nleg))

sse_nleg_plot <- ggplot(data = sse_nleg, aes(x = nleg, y = sse, color = source)) +
  geom_point(alpha = .5, size = 1) +
  theme_bw() +
  facet_wrap(facets = source ~ .) +
  theme(legend.position = "none")

sse_nleg_plot

Fitted coefficients

coeffs <- all_samples %>%
  group_by(sim, source) %>%
  summarize(intercept = legendre_approx(abund, nleg = 5)$coefficients[1], 
            l1 = legendre_approx(abund, nleg = 5)$coefficients[2],
            l2 = legendre_approx(abund, nleg = 5)$coefficients[3],
            l3 = legendre_approx(abund, nleg = 5)$coefficients[4], 
            l4 = legendre_approx(abund, nleg = 5)$coefficients[5],
            l5 = legendre_approx(abund, nleg = 5)$coefficients[6]) %>%
  ungroup() %>%
  tidyr::gather(-sim, -source, key = "coefficient", value = "val")

coeffs_plot <- ggplot(data = coeffs, aes(x = coefficient, y = val, color = source)) +
  geom_point(alpha = .5) +
  theme_bw() +
  facet_wrap(facets = source ~ .) +
  theme(legend.position = "none")

coeffs_plot

fs_reference_coeffs <- fs_samples[[2]] %>%
  group_by(sim, source) %>%
  summarize(intercept = legendre_approx(abund, nleg = 5)$coefficients[1], 
            l1 = legendre_approx(abund, nleg = 5)$coefficients[2],
            l2 = legendre_approx(abund, nleg = 5)$coefficients[3],
            l3 = legendre_approx(abund, nleg = 5)$coefficients[4], 
            l4 = legendre_approx(abund, nleg = 5)$coefficients[5],
            l5 = legendre_approx(abund, nleg = 5)$coefficients[6]) %>%
  ungroup() %>%
  tidyr::gather(-sim, -source, key = "coefficient", value = "val")

fs_reference_centroid <- fs_reference_coeffs %>%
  select(coefficient, val) %>%
  group_by(coefficient) %>%
  summarize(val = mean(val)) %>%
  ungroup() %>%
  select(val) %>%
  t() %>%
  as.matrix()


mete_reference_coeffs <- mete_samples[[2]] %>%
  group_by(sim, source) %>%
  summarize(intercept = legendre_approx(abund, nleg = 5)$coefficients[1], 
            l1 = legendre_approx(abund, nleg = 5)$coefficients[2],
            l2 = legendre_approx(abund, nleg = 5)$coefficients[3],
            l3 = legendre_approx(abund, nleg = 5)$coefficients[4], 
            l4 = legendre_approx(abund, nleg = 5)$coefficients[5],
            l5 = legendre_approx(abund, nleg = 5)$coefficients[6]) %>%
  ungroup() %>% 
  tidyr::gather(-sim, -source, key = "coefficient", value = "val")

mete_reference_centroid <- mete_reference_coeffs %>%
  select(coefficient, val) %>%
  group_by(coefficient) %>%
  summarize(val = mean(val)) %>%
  ungroup() %>%
  select(val) %>%
  t() %>%
  as.matrix()

centroid_dist <- coeffs %>%
  group_by(sim, source) %>%
  summarize(fs = eucl_rows(val, fs_reference_centroid[1, ]),
            mete = eucl_rows(val, mete_reference_centroid[1, ])) %>%
  ungroup() %>%
  tidyr::gather(-sim, -source, key = "reference_centroid", value = "distance")


centroid_dist_plot <- ggplot(data = centroid_dist, aes(x = source, y = distance, color = source)) +
  geom_boxplot() +
  theme_bw() +
  theme(legend.position = "none") +
  facet_wrap(facets = reference_centroid ~ .,
             ncol = 1)

centroid_dist_plot

save.image(file = here::here("reports", "comparison_functions_stash", paste0(datname, "_stash.RData")))
save(centroid_dist, file = here::here("reports", "comparison_functions_stash", paste0(datname, "_centroid_dist.Rds")))