# This function simulates dynamic species abundance distributions for a given
# list() input which needs the following information:
# example_input <- list(
# # (n_sp) number of species
# n_sp = 20,
# # (n_year) number of time points: can either be
# # - a scalar, giving the number of time points, or
# # - a vector of specific time points to be included in the simulation, where
# # time = "fixed" needs to be specified, see below
# n_year = 10,
# # (n_loc) number of locations: scalar. If locations have specific coordinates
# # location = "fixed", see below and pos_x and pos_y needs to be specified
# n_loc = 5,
# # (n_obs) number of "observers": scalar. the number of repeated measures for
# # each unique combination of species-location-time point.
# n_obs = 2,
# # (mu) mean log abundance of species in the community, in practice this
# # parameter is confounded with the sampling intensity
# mu = 0,
# # (mu_sigma) variance in mean log abundance between each location-time
# # point. Need to set "var_mu = TRUE" to be used
# mu_sigma = 0,
# # (sigma_r_sq) species heterogeneity: this is the variance in species
# # abundance distribution due to variation in log carrying capacity, in the
# # text this is referred to as $\sigma_r^2/\delta^2$
# sigma_r_sq = 2,
# # (sigma_s_sq) species specific environmental variance: this is the variance
# # in species abundance distribution due to variation between years, in the
# # text this is referred to as $\sigma_s^2/2\delta$
# sigma_s_sq = 1,
# # (sigma_d_sq) over-dispersion: notation is terrible due to the fact that
# # over-dispersion is also confounded with demographic variance, in the text
# # this is referred to as $\theta^2$, or $\sigma_d^2/2\bar{K}\delta+\theta^2$
# sigma_d_sq = 0.5,
# # (delta) strength of density regulation: the inverse mean return time to
# # equilibrium and the exponential rate of decay in the temporal dimension
# delta = 0.2,
# # (eta) inverse spatial scaling: here we assume spatial scaling is
# # exponentially decreasing
# eta = 0.3,
# # (pos_x) position of location on the x-axis: if specific locations are used
# # they need to be specified with a vector of x-coordinates, if NULL, it is
# # assumed locations are located along a diagonal line, i.e. (1,1), (2,2) etc
# pos_x = NULL,
# # (pos_x) position of location on the x-axis: if specific locations are used
# # they need to be specified with a vector of x-coordinates, if NULL, it is
# # assumed locations are located along a diagonal line, i.e. (1,1), (2,2) etc
# pos_y = NULL)
# Un-comment the list above to test the simulation function
# required packages
# library(Matrix)
# library(MASS)
# library(tidyverse)
# just load "required_packages.R"
# f_sim_sad_fast: function simulate species abundance distributions fast (the
# old version was slow:))
# input: see above
# time: "seq" = time points are a sequence, going from 1 to (n_year),
# "fixed" = if (n_year) is a vector of time points
# space: "seq" = locations are a sequence, going from (1, 1) to (n_loc, n_loc)
# "fixed" = if (pos_x) and (pos_y) are specified
# dependency: "none" = regular simulation
# "time" = used when simulation data when estimating temporal
# correlation and each location is assumed independent
# of each other
# "space" = used when simulation data when estimating spatial
# correlation and each time point is assumed independent
# of each other
# var_mu: FALSE = mean log abundance (mu) is assumed to be the same for all
# location-time points
# TRUE = mean log abundance (mu) is assumed to vary between location-
# time points with variance (mu_sigma)
f_sim_sad_fast <- function(input,
time = "seq",
space = "seq",
dependency = "none",
var_mu = FALSE){
# Checking if locations or time points are fixed or sequential ("seq")
if (time == "seq"){
s_year <- 1:input$n_year
l_year <- input$n_year
}
else{ # i.e. time = "fixed"
s_year <- input$n_year
l_year <- length(input$n_year)
}
if (space == "seq"){
pos_x <- 1:input$n_loc
pos_y <- 1:input$n_loc
s_loc <- 1:input$n_loc
l_loc <- input$n_loc
}
else{
pos_x <- input$pos_x
pos_y <- input$pos_y
s_loc <- 1:length(input$pos_x)
l_loc <- length(s_loc)
}
# spatio-temporal correlation
# The first is regular spatio-temporal correlation, the other two are used
# when simulation distributions conditioning on either space or time
if (dependency == "none"){
sigma_sim <- kronecker(
input$sigma_s_sq * exp(- input$delta * as.matrix(dist(s_year))),
exp(- input$eta * as.matrix(dist(cbind(pos_x, pos_y)))),
FUN = "*") +
input$sigma_r_sq
mu_sim <- rep(input$mu, l_year * l_loc)
}
else if (dependency == "time"){
sigma_sim <- kronecker(
input$sigma_s_sq * exp(- input$delta * as.matrix(dist(s_year))) +
input$sigma_r_sq,
diag(nrow = l_loc),
FUN = "*")
mu_sim <- rep(input$mu, l_year * l_loc)
}
else if (dependency == "space"){
sigma_sim <- kronecker(
diag(nrow = l_year),
input$sigma_s_sq * exp(- input$eta *
as.matrix(dist(cbind(pos_x, pos_y)))) +
input$sigma_r_sq,
FUN = "*")
mu_sim <- rep(input$mu, l_year * l_loc)
}
# Checking and simulating additional variation between space-time-points
if (!var_mu){
x_sim <- mvrnorm(n = input$n_sp,
mu = mu_sim,
Sigma = sigma_sim)
}
else{
x_sim <- mvrnorm(n = input$n_sp,
mu = rnorm(l_year * l_loc, mu_sim, sqrt(input$mu_sigma)),
Sigma = sigma_sim)
}
# naming columns as combinations of location and time
colnames(x_sim) <- paste(rep(s_loc, l_year),
rep(s_year, each = l_loc), sep = ".")
# naming rows as species
rownames(x_sim) <- 1:input$n_sp
# make table
x_sim_tbl <- as_tibble(x_sim)
# make species column
x_sim_tbl <- x_sim_tbl %>%
rownames_to_column(var = "species")
# organise table
x_sim_tbl <- x_sim_tbl %>%
pivot_longer(cols = -species,
names_to = "location.year",
values_to = "log_abundance")
# add over-dispersion
error <- rnorm(input$n_obs*nrow(x_sim_tbl),
mean = x_sim_tbl$log_abundance,
sd = sqrt(input$sigma_d_sq))
# re-organise table again!
x_sim_tbl <- tibble(
species = rep(x_sim_tbl$species, input$n_obs),
location.year = rep(x_sim_tbl$location.year, input$n_obs),
log_abundance = rep(x_sim_tbl$log_abundance, input$n_obs),
observer = as.character(rep(1:input$n_obs, each = nrow(x_sim_tbl))),
log_abundance_error = error)
# Generate abundances
y_sim_tbl <- x_sim_tbl %>%
mutate(abundance = rpois(nrow(x_sim_tbl),
lambda = exp(log_abundance_error)))
# associate each year and location to each other
year_loc_tbl <- tibble(year = rep(s_year, each = l_loc),
location = rep(s_loc, l_year),
location.year = paste(location, year, sep = "."))
# add year and location as variable
y_sim_tbl <- left_join(y_sim_tbl, year_loc_tbl, by = "location.year")
# create temporal variable in the format of glmmTMB
y_sim_tbl <- y_sim_tbl %>%
arrange(year) %>%
mutate(year_m = numFactor(as.numeric(year)))
# associate each location to a position
loc_tbl <- tibble(location = s_loc,
pos_x = pos_x,
pos_y = pos_y)
# add position to data table
y_sim_tbl <- left_join(y_sim_tbl, loc_tbl, by = "location")
# create spatial variable in the format of glmmTMB
y_sim_tbl <- y_sim_tbl %>%
mutate(pos_m = numFactor(pos_x, pos_y))
return(y_sim_tbl)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.