# Estimating dynamic species abundance model using the 'poilog' package
# Estimating univariate Poisson lognormal distributions ####
## f_poilog_yl: function poilog year-location ####
# for each observer/replicate at each unique year-location combination, we fit
# a univariate poisson lognormal distribtion. The mean value of the estimated
# variances are used as estimate of the total variance which is partitioned
# based on correlations that are estimated below.
f_poilog_yl <- function(data){
result <- c()
# obs: repeated measurements
for (obs in unique(data$observer)){
# y_i: unique time points
for (y_i in unique(data$year_m)){
# l_i: unique locations
for (l_i in unique(data$pos_m)){
# find the corresponding subset of the data
n_i <- data %>%
filter(year_m == y_i,
pos_m == l_i,
observer == obs) %>%
dplyr::select(species, abundance)
# fit the univariate poisson lognormal distribution
est_i <- try(poilogMLE(n = n_i$abundance))
# if the estimation worked, store the result
if (class(est_i) != "try-error"){
est_i_tbl <- tibble(mu = est_i$par[1],
lsigma_e = log(est_i$par[2]),
p0_hat = 1 - est_i$p,
e_0 = NA)
result <- bind_rows(result,
add_column(est_i_tbl,
y_i = y_i,
l_i = l_i,
obs = obs),
)
}
cat(paste(y_i, l_i, obs, sep = " / "), " \r")
}
}
}
return(result)
}
# Estimating bivariate Poisson lognormal distributions ####
## f_poilog_yll: function poilog year-location1-location2 ####
# for each observer/replicate at each unique year-location1-location2
# combination, we fit the bivariate poisson lognormal distribtion.
# The correlations from these estimates are used to determine the different
# proportions of variance, strength of density regulation and inverse spatial
# scaling
f_poilog_yll <- function(data){
result_yll <- c()
# obs: for each observer/replicate
for (obs in unique(data$observer)){
# y_i: for each unique time point
for (y_i in unique(data$year_m)){
# l_selected: all possible locations a given time point
l_selected <- unique(data$pos_m)
# l_i: for each location
for (l_i in unique(data$pos_m)){
# all remaining locations
l_selected <- l_selected[l_selected != l_i]
# data for the first community
n_i <- data %>%
filter(year_m == y_i,
pos_m == l_i,
observer == obs) %>%
dplyr::select(species, abundance)
# l_j: for each remaining location
for (l_j in l_selected){
# data for the second community
n_j <- data %>%
filter(year_m == y_i,
pos_m == l_j,
observer == obs) %>%
dplyr::select(species, abundance)
# join the two data sets together
n_ij <- full_join(n_i, n_j, by = "species",
suffix = c(".i", ".j")) %>%
mutate(abundance.i = ifelse(is.na(abundance.i), 0, abundance.i),
abundance.j = ifelse(is.na(abundance.j), 0, abundance.j))
# fit the bivariate Poisson lognormal distribution
est_ij <- try(bipoilogMLE(n1 = cbind(n_ij[, 2], n_ij[, 3])))
# if the estimation worked, store the result
if (class(est_ij) != "try-error"){
est_ij_tbl <- tibble(mu_1 = est_ij$par[1],
mu_2 = est_ij$par[2],
lsigma_e_1 = log(est_ij$par[3]),
lsigma_e_2 = log(est_ij$par[4]),
us = NA,
p0_hat = 1 - mean(est_ij$p),
e_0 = NA,
rho = est_ij$par[5])
result_yll <- bind_rows(result_yll,
add_column(est_ij_tbl,
y_i = y_i,
l_i = l_i,
l_j = l_j,
obs = obs),
)
}
cat(paste(y_i, l_i, l_j, obs, sep = " / "), " \r")
}
}
}
}
return(result_yll)
}
## f_poilog_yyll: function poilog year1-year2-location1-location2 ####
# for each observer/replicate at each unique year1-year2-location1-location2
# combination, we fit the bivariate poisson lognormal distribtion.
# The correlations from these estimates are used to determine the different
# proportions of variance, strength of density regulation and inverse spatial
# scaling
f_poilog_yyll <- function(data){
result_yyll <- c()
# obs: for each observer/replicate
for (obs in unique(data$observer)){
# y_selected: all possible time points
y_selected <- unique(data$year_m)
# y_i: for all years, except the last one, which will be included in all
# the other combinations
for (y_i in unique(data$year_m)[-n_distinct(data$year_m)]){
# all remaining years
y_selected <- y_selected[y_selected != y_i]
# l_i: for all locations
for (l_i in unique(data$pos_m)){
# first data set
n_i <- data %>%
filter(year_m == y_i,
pos_m == l_i,
observer == obs) %>%
dplyr::select(species, abundance)
# y_j: for all remaining time points
for (y_j in y_selected){
# l_j: for all remaining locations
for (l_j in unique(data$pos_m)){
# second data set
n_j <- data %>%
filter(year_m == y_j,
pos_m == l_j,
observer == obs) %>%
dplyr::select(species, abundance)
# join data
n_ij <- full_join(n_i, n_j, by = "species",
suffix = c(".i", ".j")) %>%
mutate(abundance.i = ifelse(is.na(abundance.i), 0, abundance.i),
abundance.j = ifelse(is.na(abundance.j), 0, abundance.j))
# fit the bivariate poisson lognormal distribution
est_ij <- try(bipoilogMLE(n1 = cbind(n_ij[, 2], n_ij[, 3])))
# if it worked, store the result
if (class(est_ij) != "try-error"){
est_ij_tbl <- tibble(mu_1 = est_ij$par[1],
mu_2 = est_ij$par[2],
lsigma_e_1 = log(est_ij$par[3]),
lsigma_e_2 = log(est_ij$par[4]),
us = NA,
p0_hat = 1 - mean(est_ij$p),
e_0 = NA,
rho = est_ij$par[5])
result_yyll <- bind_rows(result_yyll,
add_column(est_ij_tbl,
y_i = y_i,
y_j = y_j,
l_i = l_i,
l_j = l_j,
obs = obs),
)
}
cat(paste(y_i, y_j, l_i, l_j, obs, sep = " / "), " \r")
}
}
}
}
}
return(result_yyll)
}
## f_poilog_ylo: function poilog year-location-observer ####
# for each pair of observer/replicate at each unique year-location
# combination, we fit the bivariate poisson lognormal distribtion.
# The correlations from these estimates are used to determine the different
# proportions of variance, strength of density regulation and inverse spatial
# scaling
f_poilog_ylo <- function(data){
result_ylo <- c()
# y_i: for each time point
for (y_i in unique(data$year_m)){
# l_i: for each location
for (l_i in unique(data$pos_m)){
# o_selected: possible observers/replicates
o_selected <- unique(filter(data,
year_m == y_i,
pos_m == l_i)$observer)
# n_o: number of observers/replicates
n_o <- n_distinct(filter(data,
year_m == y_i,
pos_m == l_i)$observer)
# obs_i: for all observers, except the last one
for (obs_i in unique(filter(data,
year_m == y_i,
pos_m == l_i)$observer)[-n_o]){
# first data set
n_i <- data %>%
filter(year_m == y_i,
pos_m == l_i,
observer == obs_i) %>%
dplyr::select(species, abundance)
# remaining observers
o_selected <- o_selected[o_selected != obs_i]
# obs_j: for all remaining observers
for (obs_j in o_selected){
# second data set
n_j <- data %>%
filter(year_m == y_i,
pos_m == l_i,
observer == obs_j) %>%
dplyr::select(species, abundance)
# join data sets together
n_ij <- full_join(n_i, n_j, by = "species", suffix = c(".i", ".j")) %>%
mutate(abundance.i = ifelse(is.na(abundance.i), 0, abundance.i),
abundance.j = ifelse(is.na(abundance.j), 0, abundance.j))
# fit bivariate Poisson lognormal distribution
est_ij <- try(bipoilogMLE(n1 = cbind(n_ij[, 2], n_ij[, 3])))
# if estimation works, store the result
if (class(est_ij) != "try-error"){
est_ij_tbl <- tibble(mu_1 = est_ij$par[1],
mu_2 = est_ij$par[2],
lsigma_e_1 = log(est_ij$par[3]),
lsigma_e_2 = log(est_ij$par[4]),
us = NA,
p0_hat = 1 - mean(est_ij$p),
e_0 = NA,
rho = est_ij$par[5])
result_ylo <- bind_rows(result_ylo,
add_column(est_ij_tbl,
y_i = y_i,
l_i = l_i,
obs_i = obs_i,
obs_j = obs_j),
)
}
cat(paste(y_i, l_i, obs_i, obs_j, sep = " / "), " \r")
}
}
}
}
return(result_ylo)
}
# auxiliary function: f_distance ####
# takes all bivariate estimates (result_yll and result_yyll) and joins them
# together with their corresponding distance in space or time
f_distance <- function(data, result_yll, result_yyll){
# matrix of all time distances
time_mat <- as.matrix(dist(parseNumLevels(unique(data$year_m))))
colnames(time_mat) <- as.character(unique(data$year_m))
rownames(time_mat) <- as.character(unique(data$year_m))
# matrix of all spatial distances
dist_mat <- as.matrix(dist(parseNumLevels(unique(data$pos_m))))
colnames(dist_mat) <- as.character(unique(data$pos_m))
rownames(dist_mat) <- as.character(unique(data$pos_m))
# all unique time points
time_yiyj <- result_yyll %>%
dplyr::select(y_i, y_j) %>%
distinct()
# add time distance
time_yiyj <- time_yiyj %>%
group_by(y_i, y_j) %>%
mutate(time = time_mat[which(colnames(time_mat) == y_i),
which(rownames(time_mat) == y_j)]) %>%
ungroup()
# all unique locations
dist_lilj <- result_yll %>%
dplyr::select(l_i, l_j) %>%
distinct()
# add spatial distance
dist_lilj <- dist_lilj %>%
group_by(l_i, l_j) %>%
mutate(distance = dist_mat[which(colnames(dist_mat) == l_i),
which(rownames(dist_mat) == l_j)]) %>%
ungroup()
# add temporal and spatial distance to results
result_yll <- left_join(result_yll, dist_lilj)
result_yyll <- left_join(result_yyll, time_yiyj)
result_yyll <- left_join(result_yyll, dist_lilj)
# making sure NA distances are zero
result_yyll <- result_yyll %>%
group_by(y_i, y_j, l_i, l_j, obs) %>%
mutate(distance = ifelse(is.na(distance),
ifelse(l_i != l_j,
dist_mat[which(colnames(dist_mat) == l_j),
which(rownames(dist_mat) == l_i)],
0),
distance)) %>%
ungroup()
# making sure NA times are zero and re-organise to match other results
result_yll <- result_yll %>%
add_column(y_j = NA,
time = 0) %>%
dplyr::select(mu_1, mu_2, lsigma_e_1, lsigma_e_2, us, p0_hat, e_0, rho,
y_i, y_j, l_i, l_j, obs, time, distance)
# join results
result_all_yyll <- bind_rows(result_yll,
result_yyll)
return(result_all_yyll)
}
# total variance ####
# based on the results from the univariate Poisson lognormal distributions, we
# compute the mean log abundance and mean total variance (and standard errors)
f_tot_var <- function(result_yl){
result <- tibble(avg_mu = mean(result_yl$mu),
sd_mu = sd(result_yl$mu),
avg_sigma_sq = mean(exp(result_yl$lsigma_e)^2),
sd_sigma_sq = sd(exp(result_yl$lsigma_e)^2))
result
}
# proportion of variance due to over-dispersion ####
# if available, this function estimates 1 - the correlation at time/distance
# equal to zero, which is the proportion of the total variance attributable
# to over-dispersion
f_prop_error <- function(result_ylo){
result <- tibble(rho_0 = mean(1 - result_ylo$rho))
result
}
# correlation function ####
# with all the different correlations estimated at different spatial and
# temporal distances, we fit a exponential curve using least squares
f_rho_fun <- function(result_yyll, prop_error, given = "none"){
# all correlations and their distances
rho_data <- result_yyll %>%
dplyr::select(time, distance, rho)
# correlation function, to be used in the sum of squares
cor_f <- function(t, z, par, data, prop_error, given){
# if there is a proportion of variance due to over-dispersion, reduce the
# remaining proportion accordingly
# A is the proportion due to environmental variance
A = (1 - prop_error) * exp(par[1])/(1 + exp(par[1]))
# B is the proportion due to species heterogeneity
B = (1 - prop_error) * exp(par[2])/(1 + exp(par[2]))
# if temporal correlation is not estimated
if(given == "time"){
ct = 0
}
else{
ct = exp(par[3])
}
# if spatial correlation is not estimated
if(given == "space"){
cz = 0
}
else{
cz = exp(par[4])
}
A * exp(-ct * t -cz * z) + B
}
# sum of squares function
ss <- function(par, data, prop_error, given){
sum((data$rho - cor_f(t = data$time,
z = data$distance,
par = par,
prop_error = prop_error,
given = given))^2)
}
# numerical optimiser
est <- nlminb(start = c(1, 0, log(0.001), log(0.2)),
objective = ss,
data = rho_data,
prop_error = prop_error, given = given)
# calculate parameters
mle_A <- (1 - prop_error) * exp(est$par[1])/(1 + exp(est$par[1]))
mle_B <- (1 - prop_error) * exp(est$par[2])/(1 + exp(est$par[2]))
if(given == "time"){
mle_ct = 0
}
else{
mle_ct = exp(est$par[3])
}
if(given == "space"){
mle_cz = 0
}
else{
mle_cz = exp(est$par[4])
}
return(tibble(prop_env = mle_A,
prop_het = mle_B,
delta = mle_ct,
eta = mle_cz))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.