# Different versions of the zero-adjusted algorithm
# Used in the simulation example of different assumptions ####
# f_est_sad_mt: temporal correlation, species heterogeneity, over-dispersion
# f_est_sad_mt_nh: temporal correlation, NO heterogeneity, over-dispersion
# f_est_sad_mt_nc: NO correlation, species heterogeneity, over-dispersion
# Data is assumed to be a tibble (or data.frame) with the following columns:
# species: identity of species (numer or name)
# abundance: abundance (integer) of the species
# year_m: numFactor() of the temporal component, e.g. year
# pos_m: numFactor() of the spatial component, i.e. x and y positions
# observer: a variable that makes each observation unique, e.g. row number,
# could also be unique for the specific species-time-location combo
# In addition to 'data' the functions takes the following input:
# n_sp: the number of species to simulate each iteration
# n_boot: the number of iterations in each run of the algorithm
# n_rep: if you want to repeat each iteration step multiple times
# for the same run of the algorithm and take the mean of those values
# Temporal correlation and species heterogeneity ####
f_est_sad_mt <- function(data, n_sp = 1, n_boot = 10, n_rep = 1){
# the truncated_poisson model takes only positive values
data <- data %>%
filter(abundance > 0)
st_est_glmmTMB_01 <- glmmTMB(
# abundance is described by
abundance ~
# temporal correlation: exponential decay between zero and one,
# i.e. an Ornstein-Uhlenbeck process
ou(year_m + 0 | species:pos_m) +
# species heterogeneity
(1 | species:pos_m) +
# over-dispersion (sampling error)
(1 | species:year_m:pos_m:observer),
data = data,
family = truncated_poisson(link = "log"))
# Store the parameter estimates and convergence information
st_est_adj_01 <- c()
st_est_adj_01 <- rbind(st_est_adj_01,
c(st_est_glmmTMB_01$fit$par,
st_est_glmmTMB_01$fit$convergence))
# k: the number of iterations
for (k in 1:n_boot){
# storage for intermediate estimates of simulations
st_boot_tmp_01 <- c()
# input: parameter values for the simulation
input <- list(
# number of species
n_sp = n_sp,
# number of time points
n_year = unique(data$year)[order(unique(data$year))],
# number of locations
n_loc = n_distinct(data$pos_m),
# number of locations
n_obs = n_distinct(data$observer),
# mean log abundance
mu = st_est_adj_01[k, 1],
# variance of mean log abundance
mu_sigma = 0,
# species heterogeneity
sigma_r_sq = exp(st_est_adj_01[k, 4])^2,
# environmental variance
sigma_s_sq = exp(st_est_adj_01[k, 2])^2,
# sampling variance
sigma_d_sq = exp(st_est_adj_01[k, 5])^2,
# strength of density regulation
delta = exp(st_est_adj_01[k, 3]),
# inverse spatial scaling
eta = 0,
# x coordinates
pos_x = NULL,
# y coordinates
pos_y = NULL)
# i: each iteration step can be repeated multiple times time get an average
# value of the parameter estimates at the current iteration
for (i in 1:n_rep){
data_boot_01 <- f_sim_sad_fast(input,
# we use the specific time points of the
# data set
time = "fixed",
# we are only interested in time, so we
# simulate with independent locations
dependency = "time")
# estimate the temporal correlation
st_est_boot_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
(1 | species:pos_m) +
(1 | species:year_m:pos_m:observer),
data = filter(data_boot_01, abundance > 0),
family = truncated_poisson(link = "log"))
# store all intermediate estimates
st_boot_tmp_01 <- rbind(st_boot_tmp_01,
c(st_est_boot_01$fit$par,
st_est_boot_01$fit$convergence))
}
# adjust the estimate by the (average of the) current iteration
st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] -
apply(st_boot_tmp_01, 2, mean)
cat(paste("Fitting model, iteration step", k, sep = " "), " \r")
# add the new estimate to the matrix of iteration steps
st_est_adj_01 <- rbind(st_est_adj_01,
st_est_boot_adj_01)
}
# naming the parameter estimates
rownames(st_est_adj_01) <- NULL
colnames(st_est_adj_01) <- c("mu",
"lsigma_e",
"ldelta",
"lsigma_r",
"lsigma_d",
"convergence")
return(as_tibble(st_est_adj_01))
}
# No heterogeneity ####
# See the function f_est_sad_mt above for explanation of the code
f_est_sad_mt_nh <- function(data, n_sp = 1, n_boot = 10, n_rep = 1){
data <- data %>%
filter(abundance > 0)
st_est_glmmTMB_01 <- glmmTMB(
abundance ~ ou(year_m + 0 | species:pos_m) +
(1 | species:year_m:pos_m:observer),
data = data,
family = truncated_poisson(link = "log"))
st_est_adj_01 <- c()
st_est_adj_01 <- rbind(st_est_adj_01,
c(st_est_glmmTMB_01$fit$par,
st_est_glmmTMB_01$fit$convergence))
for (k in 1:n_boot){
st_boot_tmp_01 <- c()
input <- list(n_sp = n_sp,
n_year = unique(data$year)[order(unique(data$year))],
n_loc = n_distinct(data$pos_m),
n_obs = n_distinct(data$observer),
mu = st_est_adj_01[k, 1],
mu_sigma = 0,
sigma_r_sq = 0,
sigma_s_sq = exp(st_est_adj_01[k, 2])^2,
sigma_d_sq = exp(st_est_adj_01[k, 4])^2,
delta = exp(st_est_adj_01[k, 3]),
eta = 0,
pos_x = NULL,
pos_y = NULL)
for (i in 1:n_rep){
data_boot_01 <- f_sim_sad_fast(input,
time = "fixed",
dependency = "time")
st_est_boot_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
(1 | species:year_m:pos_m:observer),
data = filter(data_boot_01, abundance > 0),
family = truncated_poisson(link = "log"))
st_boot_tmp_01 <- rbind(st_boot_tmp_01,
c(st_est_boot_01$fit$par,
st_est_boot_01$fit$convergence))
}
st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] -
apply(st_boot_tmp_01, 2, mean)
cat(paste("Fitting model, iteration step", k, sep = " "), " \r")
st_est_adj_01 <- rbind(st_est_adj_01,
st_est_boot_adj_01)
}
rownames(st_est_adj_01) <- NULL
colnames(st_est_adj_01) <- c("mu",
"lsigma_e",
"ldelta",
"lsigma_d",
"convergence")
return(as_tibble(st_est_adj_01))
}
# No temporal correlation ####
# See the function f_est_sad_mt above for explanation of the code
f_est_sad_mt_nc <- function(data, n_sp =1, n_boot = 10, n_rep = 1){
data <- data %>%
filter(abundance > 0)
st_est_glmmTMB_01 <- glmmTMB(
abundance ~ (1 | species:year_m:pos_m) +
(1 | species:pos_m) +
(1 | species:year_m:pos_m:observer),
data = data,
family = truncated_poisson(link = "log"))
st_est_adj_01 <- c()
st_est_adj_01 <- rbind(st_est_adj_01,
c(st_est_glmmTMB_01$fit$par,
st_est_glmmTMB_01$fit$convergence))
for (k in 1:n_boot){
st_boot_tmp_01 <- c()
input <- list(n_sp = n_sp,
n_year = unique(data$year)[order(unique(data$year))],
n_loc = n_distinct(data$pos_m),
n_obs = n_distinct(data$observer),
mu = st_est_adj_01[k, 1],
mu_sigma = 0,
sigma_r_sq = exp(st_est_adj_01[k, 3])^2,
sigma_s_sq = exp(st_est_adj_01[k, 2])^2,
sigma_d_sq = exp(st_est_adj_01[k, 4])^2,
# delta = 1000, short-cut to turn correlation to zero
delta = 1000,
eta = 0,
pos_x = NULL,
pos_y = NULL)
for (i in 1:n_rep){
data_boot_01 <- f_sim_sad_fast(input,
time = "fixed",
dependency = "time")
st_est_boot_01 <- glmmTMB(abundance ~ (1 | species:year_m:pos_m) +
(1 | species:pos_m) +
(1 | species:year_m:pos_m:observer),
data = filter(data_boot_01, abundance > 0),
family = truncated_poisson(link = "log"))
st_boot_tmp_01 <- rbind(st_boot_tmp_01,
c(st_est_boot_01$fit$par,
st_est_boot_01$fit$convergence))
}
st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] -
apply(st_boot_tmp_01, 2, mean)
cat(paste("Fitting model, iteration step", k, sep = " "), " \r")
st_est_adj_01 <- rbind(st_est_adj_01,
st_est_boot_adj_01)
}
rownames(st_est_adj_01) <- NULL
colnames(st_est_adj_01) <- c("mu",
"lsigma_e",
"lsigma_r",
"lsigma_d",
"convergence")
return(as_tibble(st_est_adj_01))
}
# Spatial correlation and species heterogeneity ####
f_est_sad_mz <- function(data, n_boot = 20, n_rep = 1, n_sp = 1){
data <- data %>%
filter(abundance > 0)
# estimate the temporal correlation
st_est_glmmTMB_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
(1 | species:year_m) +
(1 | species:year_m:pos_m:observer),
data = data,
family = truncated_poisson(link = "log"))
st_est_adj_01 <- c()
st_est_adj_01 <- rbind(st_est_adj_01,
c(st_est_glmmTMB_01$fit$par,
st_est_glmmTMB_01$fit$convergence))
for (k in 1:n_boot){
st_boot_tmp_01 <- c()
input <- list(n_sp = n_distinct(data$species) * n_sp, # number of species
n_year = n_distinct(data$year), # number of time points
n_loc = n_distinct(data$pos_m), # number of locations
n_obs = n_distinct(data$observer), # number of locations
mu = st_est_adj_01[k, 1], # mean log abundance
sigma_r_sq = exp(st_est_adj_01[k, 4])^2, # species heterogeneity
sigma_s_sq = exp(st_est_adj_01[k, 2])^2, # environmental variance
sigma_d_sq = exp(st_est_adj_01[k, 5])^2, # sampling variance
delta = 0, # "strength of density regulation"
eta = 1 / exp(st_est_adj_01[k, 3]),
pos_x = unique(data$pos_x)[order(unique(data$pos_x))],
pos_y = unique(data$pos_y)[order(unique(data$pos_y))]) # 1 / "spatial scaling of noise"
for (i in 1:n_rep){
data_boot_01 <- f_sim_sad_fast(input, space = "fixed", dependency = "space")
# estimate the temporal correlation
st_est_boot_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
(1 | species:year_m) +
(1 | species:year_m:pos_m:observer),
data = filter(data_boot_01, abundance > 0),
family = truncated_poisson(link = "log"))
st_boot_tmp_01 <- rbind(st_boot_tmp_01,
c(st_est_boot_01$fit$par, st_est_boot_01$fit$convergence))
}
st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] - apply(st_boot_tmp_01, 2, mean)
cat(paste("Fitting model", case, "iteration", it, "of 100, boot correction ", k, sep = " "), " \r")
st_est_adj_01 <- rbind(st_est_adj_01,
st_est_boot_adj_01)
}
rownames(st_est_adj_01) <- NULL
colnames(st_est_adj_01) <- c("mu", "lsigma_e", "letainv", "lsigma_r", "lsigma_d", "convergence")
return(as_tibble(st_est_adj_01))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.