knitr::opts_chunk$set(echo = FALSE) library(ggplot2) library(meteR) library(dplyr) library(scads) set.seed(1977) nsamples <- 25
datname = "scale" this_sad <- MATSS::get_bbs_route_region_data(route = 1, region = 11)$abundance %>% tidyr::gather(key = "species", value = "abund") %>% group_by(species) %>% summarize(abund = sum(abund)) %>% ungroup() %>% select(abund) %>% arrange(abund) %>% as.matrix() %>% as.vector() nspp = length(this_sad) nind = sum(this_sad) this_sad <- data.frame( abund = this_sad, source = "emp", sim = NA, rank = c(1:nspp) )
# mete ## constuct as 50 draws then split into two groups of 25 mete_draws <- sample_METE(s = nspp, n = nind, nsamples = 2*nsamples, distinct = T) mete_samples <- list(t(mete_draws[, 1:nsamples]), t(mete_draws[, (nsamples+1):ncol(mete_draws)])) rm(mete_draws) mete_samples <- lapply(mete_samples, FUN = function(some_samples) return(some_samples %>% as.data.frame() %>% mutate(sim = row_number()) %>% tidyr::gather(-sim, key = "rank", value = "abund") %>% mutate(rank = substr(rank, 2, nchar(rank))) %>% mutate(rank = as.integer(rank), source = "mete"))) # fs # # fs_draws <- sample_feasibleset(s = nspp, n = nind, nsamples = 2*nsamples, distinct = T) # # fs_samples <- list(t(fs_draws[, 1:nsamples]), t(fs_draws[, (nsamples +1):ncol(fs_draws)])) # # rm(fs_draws) # # fs_samples <- lapply(fs_samples, FUN = function(some_samples) # return(some_samples %>% # as.data.frame() %>% # mutate(sim = row_number()) %>% # tidyr::gather(-sim, key = "rank", value = "abund") %>% # mutate(rank = substr(rank, 2, nchar(rank))) %>% # mutate(rank = as.integer(rank), # source = "fs"))) # poilog pl_best_fit <- poilog::poilogMLE(this_sad$abund) pl_draws <- replicate(n = nsamples * 150, expr = sort(poilog::rpoilog(S = nspp, mu = pl_best_fit$par[1], sig = pl_best_fit$par[2])), simplify = T) keep_draw <- vapply(pl_draws, FUN = function(draw) return(length(draw) == nspp), FUN.VALUE = TRUE) pl_draws <- pl_draws[keep_draw] pl_draws <- pl_draws[1:nsamples] names(pl_draws) <- 1:nsamples pl_samples <- as.data.frame(pl_draws) %>% t() %>% as.data.frame() %>% mutate(sim = row_number()) %>% tidyr::gather(-sim, key = "rank", value = "abund") %>% mutate(rank = substr(rank, 2, nchar(rank))) %>% mutate(rank = as.integer(rank), source = "poilog") rm(pl_draws) rm(keep_draw) rm(pl_best_fit) # logseries unconstrained by N this_esf <- meteR::meteESF(S0 = nspp, N0 = nind) this_sad <- meteR::sad(this_esf) ls_draws <- replicate(n = nsamples, expr = sort(this_sad$r(n = nspp))) %>% t() ls_samples <- apply(ls_draws, MARGIN = 2, FUN = sort) %>% as.data.frame() %>% mutate(sim = row_number()) %>% tidyr::gather(-sim, key = "rank", value = "abund")%>% mutate(rank = substr(rank, 2, nchar(rank))) %>% mutate(rank = as.integer(rank), source = "logseries") rm(ls_draws) rm(this_esf) rm(this_sad) # negative binomial negbin_bestfit <- fitdistrplus::fitdist(this_sad$abund, distr = "nbinom") negbin_samples <- replicate(n = nsamples * 100, expr = sort(rnbinom(n = nspp, size = negbin_bestfit$estimate[1], mu = negbin_bestfit$estimate[2]))) %>% t() any0 <- apply(negbin_samples, MARGIN = 1, FUN = function(draw) return(any(draw == 0))) negbin_samples <- negbin_samples[ !any0, ] negbin_samples <- negbin_samples[1:nsamples, ] rm(negbin_bestfit) rm(any0) negbin_samples <- negbin_samples %>% as.data.frame() %>% mutate(sim = row_number()) %>% tidyr::gather(-sim, key = "rank", value = "abund")%>% mutate(rank = substr(rank, 2, nchar(rank))) %>% mutate(rank = as.integer(rank), source = "nbinom") flat_sample <- data.frame( sim = NA, rank = 1:nspp, abund = sort(scads::generate_even_sad(this_sad$abund)), source = "even") exp_sample <- data.frame( sim = NA, rank = 1:nspp, abund = sort(scads::generate_exponential_sad(this_sad$abund)), source = "exponent" ) precipice_sample <- data.frame( sim = NA, rank = 1:nspp, abund = sort(scads::generate_precipice_sad(this_sad$abund)), source = "precipice") stepwise_sample <- data.frame( sim = NA, rank = 1:nspp, abund = sort(scads::generate_stepwise_sad(this_sad$abund)), source = "stepwise") all_samples <- bind_rows( this_sad, mete_samples[[1]], fs_samples[[1]], pl_samples, ls_samples, negbin_samples, flat_sample, exp_sample, precipice_sample, stepwise_sample )
raw_samples_plot <- ggplot(data = all_samples, aes(x = rank, y = abund)) + geom_point(alpha = .5) + theme_bw() + facet_wrap(facets = source ~ ., scales = "free_y") raw_samples_plot
sse_nleg <- all_samples %>% group_by(sim, source) %>% summarize(sse2 = sum((legendre_approx(abund, 2)$residuals)^2), sse3 = sum((legendre_approx(abund, 3)$residuals)^2), sse4 = sum((legendre_approx(abund, 4)$residuals)^2), sse5 = sum((legendre_approx(abund, 5)$residuals)^2), sse6 = sum((legendre_approx(abund, 6)$residuals)^2)) %>% ungroup() %>% tidyr::gather(-sim, -source, key = "nleg", value = "sse") %>% mutate(nleg = substr(nleg, 4, nchar(nleg))) %>% mutate(nleg = as.integer(nleg)) sse_nleg_plot <- ggplot(data = sse_nleg, aes(x = nleg, y = sse, color = source)) + geom_point(alpha = .5, size = 1) + theme_bw() + facet_wrap(facets = source ~ .) + theme(legend.position = "none") sse_nleg_plot
coeffs <- all_samples %>% group_by(sim, source) %>% summarize(intercept = legendre_approx(abund, nleg = 5)$coefficients[1], l1 = legendre_approx(abund, nleg = 5)$coefficients[2], l2 = legendre_approx(abund, nleg = 5)$coefficients[3], l3 = legendre_approx(abund, nleg = 5)$coefficients[4], l4 = legendre_approx(abund, nleg = 5)$coefficients[5], l5 = legendre_approx(abund, nleg = 5)$coefficients[6]) %>% ungroup() %>% tidyr::gather(-sim, -source, key = "coefficient", value = "val") coeffs_plot <- ggplot(data = coeffs, aes(x = coefficient, y = val, color = source)) + geom_point(alpha = .5) + theme_bw() + facet_wrap(facets = source ~ .) + theme(legend.position = "none") coeffs_plot
fs_reference_coeffs <- fs_samples[[2]] %>% group_by(sim, source) %>% summarize(intercept = legendre_approx(abund, nleg = 5)$coefficients[1], l1 = legendre_approx(abund, nleg = 5)$coefficients[2], l2 = legendre_approx(abund, nleg = 5)$coefficients[3], l3 = legendre_approx(abund, nleg = 5)$coefficients[4], l4 = legendre_approx(abund, nleg = 5)$coefficients[5], l5 = legendre_approx(abund, nleg = 5)$coefficients[6]) %>% ungroup() %>% tidyr::gather(-sim, -source, key = "coefficient", value = "val") fs_reference_centroid <- fs_reference_coeffs %>% select(coefficient, val) %>% group_by(coefficient) %>% summarize(val = mean(val)) %>% ungroup() %>% select(val) %>% t() %>% as.matrix() mete_reference_coeffs <- mete_samples[[2]] %>% group_by(sim, source) %>% summarize(intercept = legendre_approx(abund, nleg = 5)$coefficients[1], l1 = legendre_approx(abund, nleg = 5)$coefficients[2], l2 = legendre_approx(abund, nleg = 5)$coefficients[3], l3 = legendre_approx(abund, nleg = 5)$coefficients[4], l4 = legendre_approx(abund, nleg = 5)$coefficients[5], l5 = legendre_approx(abund, nleg = 5)$coefficients[6]) %>% ungroup() %>% tidyr::gather(-sim, -source, key = "coefficient", value = "val") mete_reference_centroid <- mete_reference_coeffs %>% select(coefficient, val) %>% group_by(coefficient) %>% summarize(val = mean(val)) %>% ungroup() %>% select(val) %>% t() %>% as.matrix()
centroid_dist <- coeffs %>% group_by(sim, source) %>% summarize(fs = eucl_rows(val, fs_reference_centroid[1, ]), mete = eucl_rows(val, mete_reference_centroid[1, ])) %>% ungroup() %>% tidyr::gather(-sim, -source, key = "reference_centroid", value = "distance") centroid_dist_plot <- ggplot(data = centroid_dist, aes(x = source, y = distance, color = source)) + geom_boxplot() + theme_bw() + theme(legend.position = "none") + facet_wrap(facets = reference_centroid ~ .) centroid_dist_plot
save.image(file = here::here("reports", "comparison_functions_stash", paste0(datname, "_stash.RData"))) save(centroid_dist, file = here::here("reports", "comparison_functions_stash", paste0(datname, "_centroid_dist.Rds")))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.