# Illustration of the zero-truncation from the poilog package
# Note: it is not efficient to use the zero-adjusted algorithm on a single
# univariate or bivariate distribution, it is much better to use the poilog
# package in these cases. The benefit of the zero-adjusted algorithm is for
# multivariate cases.
# Univariate case ####
# Get the same result, i.e. figure, every time.
set.seed(1)
# Generate the (true) log abundances
x <- rnorm(100, 1, 2)
# Generate the abundances
n <- rpois(100, lambda = exp(x))
# Make a table for the data
data_tbl <- tibble(x = x,
n = n,
z = n > 0)
# Fit the non-truncated Poisson lognormal distribution using 'poilog'
# (dotted line in Figure 1A)
m.fit <- poilogMLE(n = n[n>0], zTrunc = FALSE)
# Fit the zero-truncated Poisson lognormal distribution using 'poilog'
# (dashed line in Figure 1A)
mzt.fit <- poilogMLE(n = n[n>0], zTrunc = TRUE)
# Add species variable for analysing in glmmTMB
data_tbl <- data_tbl %>%
add_column(species = 1:100)
# tmbzt.fit = "(glmm) template model builder zero truncated fit"
tmbzt.fit <- glmmTMB(n ~ (1|species),
family = truncated_poisson(link = "log"),
data = filter(data_tbl, z))
# k_boot: the number of species simulated, the higher the number, the
# more accurate the zero adjustment will be
k_boot <- 200
# res_cor: a vector to store the last iteration step for each replicate
# run of the zero-adjustment
res_cor <- c()
# m: we run the zero-adjusted algorithm 20 times (as mentioned above, this is
# not an efficient approach in one dimension)
for (m in 1:20){
# res: here we store the estimates at each iteration step
res <- c()
# res[1]: the first value is the original estimate from glmmTMB
res <- rbind(res, tmbzt.fit$fit$par)
# i: we do 10 iteration steps, usually its usefull to run a few replicates
# with higher number of iterations and determine how long they have to be
for (i in 1:10){
# simulate new abundances from a non-truncated poisson lognormal
# distribution given the current adjusted values from the zero-truncated
# GLMM estimates. keep0 = FALSE as we don't need those in the estimation
n_boot <- rpoilog(k_boot, res[i, 1], exp(res[i, 2]), keep0 = FALSE)
# Fit a new zero-truncated GLMM to the simulated data
tmbzt.boot.fit <- glmmTMB(n ~ (1|species),
family = truncated_poisson(link = "log"),
data = tibble(n = n_boot,
species = 1:length(n_boot)))
# adjust the zero-truncated estimates
tmp_res <- res[i, ] + res[1, ] - tmbzt.boot.fit$fit$par
# add the result to the current replicate
res <- rbind(res, tmp_res)
}
# store the final value of the replicate
res_cor <- rbind(res_cor, res[11, ])
}
# for the collection of (20) replicates, we compute the mean as the zero-
# adjusted estimate
tmbzt.adj.fit <- apply(res_cor, 2, mean)
# Compare estimates, note the similarity between the 2nd and 4th estimates
m.fit$par # non-truncated 'poilog'
mzt.fit$par # zero-truncated 'poilog'
c(tmbzt.fit$fit$par[1], exp(tmbzt.fit$fit$par[2])) # zero-truncated glmm
c(tmbzt.adj.fit[1], exp(tmbzt.adj.fit[2])) # zero-adjusted glmm
# Make a figure of the data, marking observed (n > 0) abundances as grey
# and add all four estimated curves. In the background there is a shade of
# the theoretical (true) distribution
fig1a <- data_tbl %>%
ggplot(aes(x = x)) +
# theoretical distribution
geom_area(data = tibble(x = seq(-5, 8, length.out = 100),
y = dnorm(x, 1, 2) * 100),
aes(x = x, y = y), alpha = 0.15) +
# abundances
geom_histogram(aes(x = x, fill = as.factor(z)),
bins = 10, colour = "black", alpha = 0.8) +
# non-truncated (poisson) lognormal distribution
stat_function(fun = function(x, mean, sd){100*dnorm(x, mean, sd)},
args = list(mean = m.fit$par[1],
sd = m.fit$par[2]),
linetype = 3, size = 1) +
# zero-truncated (poisson) lognormal distribution
stat_function(fun = function(x, mean, sd){100*dnorm(x, mean, sd)},
args = list(mean = mzt.fit$par[1],
sd = mzt.fit$par[2]),
linetype = 2, size = 1) +
# zero-truncated (glmm) lognormal distribution
stat_function(fun = function(x, mean, sd){100*dnorm(x, mean, sd)},
args = list(mean = tmbzt.fit$fit$par[1],
sd = exp(tmbzt.fit$fit$par[2])),
linetype = 1, size = 1) +
# zero-adjusted (glmm) lognormal distribution
stat_function(fun = function(x, mean, sd){100*dnorm(x, mean, sd)},
args = list(mean = tmbzt.adj.fit[1],
sd = exp(tmbzt.adj.fit[2])),
linetype = 1, size = 1) +
# aesthetics
theme_bw() +
scale_fill_manual(values = c("white", "grey")) +
labs(x = "Log abundance", y = "Observerd frequency") +
coord_cartesian(xlim = c(-4, 7)) +
guides(fill = "none")
# Bivariate case ####
# Get the same result, i.e. figure, every time.
set.seed(1)
# The total number of species
n_sp <- 100
# Generate the (true) log abundances
x2 <- mvrnorm(n = n_sp,
mu = c(1, 1),
Sigma = matrix(data = c(2, 1.5,
1.5, 2), nrow = 2))
# Generate the abundances
n2 <- matrix(data = rpois(n_sp * 2, lambda = exp(x2)), ncol = 2)
# Make a table for the data
data2_tbl <- tibble(x = c(x2),
n = c(n2),
z = n > 0,
species = rep(1:n_sp, 2),
set = rep(1:2, each = n_sp))
# Fit the bivariate non-truncated Poisson lognormal distribution using 'poilog'
# (dotted ellipse in Figure 1B)
m2.fit <- bipoilogMLE(n1 = n2[-which(n2[,1]==0 & n2[, 2]==0), ],
zTrunc = FALSE)
# Fit the bivariate zero-truncated Poisson lognormal distribution using
# 'poilog' (dashed ellipse in Figure 1B)
m2zt.fit <- bipoilogMLE(n1 = n2[-which(n2[,1]==0 & n2[, 2]==0), ],
zTrunc = TRUE)
# tmb2zt.fit = "(glmm) template model builder bi(2) variate zero truncated fit"
tmb2zt.fit <- glmmTMB(n ~ (1|species:set) +
(1|species),
family = truncated_poisson(link = "log"),
data = filter(data2_tbl, z))
# k_boot: the number of species simulated, the higher the number, the
# more accurate the zero adjustment will be
k_boot <- 200
# res_cor2: a vector to store the last iteration step for each replicate
# run of the zero-adjustment
res_cor2 <- c()
# m: we run the zero-adjusted algorithm 20 times (as mentioned above, this is
# not an efficient approach in two dimensions)
for (m in 1:20){
# res2: here we store the estimates at each iteration step
res2 <- c()
# res2[1]: the first value is the original estimate from glmmTMB
res2 <- rbind(res2, tmb2zt.fit$fit$par)
# i: we do 10 iteration steps, usually its usefull to run a few replicates
# with higher number of iterations and determine how long they have to be
for (i in 1:10){
# simulate new abundances from a non-truncated bivariate Poisson lognormal
# distribution given the current adjusted values from the zero-truncated
# GLMM estimates. First the (underlying) log abundances
x2_boot <- mvrnorm(n = k_boot,
mu = rep(res2[i, 1], 2),
Sigma = matrix(
data = exp(res2[i, 3])^2 +
c(exp(res2[i, 2])^2, 0,
0, exp(res2[i, 2])^2),
nrow = 2))
# Generate the abundances
n2_boot <- matrix(data = rpois(k_boot*2, lambda = exp(x2_boot)), ncol = 2)
# Make a table
data2boot_tbl <- tibble(x = c(x2_boot),
n = c(n2_boot),
z = n > 0,
species = rep(1:k_boot, 2),
set = rep(1:2, each = k_boot))
# Fit a new zero-truncated GLMM to the simulated data
tmb2zt.boot.fit <- glmmTMB(n ~ (1|species:set) +
(1|species),
family = truncated_poisson(link = "log"),
data = filter(data2boot_tbl, z))
# adjust the zero-truncated estimates
tmp2_res <- res2[i, ] + res2[1, ] - tmb2zt.boot.fit$fit$par
# add the result to the current replicate
res2 <- rbind(res2, tmp2_res)
}
# store the final value of the replicate
res_cor2 <- rbind(res_cor2, res2[11, ])
}
# for the collection of (20) replicates, we compute the mean as the zero-
# adjusted estimate
tmb2zt.adj.fit <- apply(res_cor2, 2, mean)
# Compare estimates, note the similarity between the 2nd and 4th estimates
m2.fit$par # non-truncated 'poilog'
m2zt.fit$par # zero-truncated 'poilog'
c(tmb2zt.fit$fit$par[c(1, 1)],
rep(sqrt(sum(exp(tmb2zt.fit$fit$par[c(2:3)])^2)), 2),
exp(tmb2zt.fit$fit$par[3]) ^ 2 /
sum(exp(tmb2zt.fit$fit$par[c(2:3)])^2)) # zero-truncated glmm
c(tmb2zt.adj.fit[c(1, 1)],
rep(sqrt(sum(exp(tmb2zt.adj.fit[c(2:3)])^2)), 2),
exp(tmb2zt.adj.fit[3]) ^ 2 /
sum(exp(tmb2zt.adj.fit[c(2:3)])^2)) # zero-adjusted glmm
# To plot the contours, see:
# https://stackoverflow.com/questions/36221596/plot-multivariate-gaussian-contours-with-ggplot2
library(plyr) # only for this example!
# Confidence level
alpha_levels <- 0.95
names(alpha_levels) <- alpha_levels
# True parameter values
contour_data <- ldply(alpha_levels,
ellipse,
x = matrix(c(1, 1.5/2,
1.5/2, 1), nrow=2),
scale = rep(sqrt(2), 2),
centre = rep(1, 2))
# non-truncated Poisson lognormal
contour_data_m2 <- ldply(alpha_levels,
ellipse,
x = matrix(c(1, m2.fit$par[5],
m2.fit$par[5], 1), nrow=2),
scale = m2.fit$par[3:4],
centre = m2.fit$par[1:2])
# zero-truncated Poisson lognormal
contour_data_m2zt <- ldply(alpha_levels,
ellipse,
x = matrix(c(1, m2zt.fit$par[5],
m2zt.fit$par[5], 1), nrow=2),
scale = m2zt.fit$par[3:4],
centre = m2zt.fit$par[1:2])
# zero-truncated GLMM
contour_data_tmb2zt <- ldply(alpha_levels,
ellipse,
x = matrix(data = exp(tmb2zt.fit$fit$par[3])^2 +
c(exp(tmb2zt.fit$fit$par[2])^2, 0,
0, exp(tmb2zt.fit$fit$par[2])^2),
nrow = 2) /
(exp(tmb2zt.fit$fit$par[3])^2 +
exp(tmb2zt.fit$fit$par[2])^2),
scale = rep(
sqrt(sum(exp(tmb2zt.fit$fit$par[2:3])^2)),
2),
centre = rep(tmb2zt.fit$fit$par[1], 2))
# zero-adjusted GLMM
contour_data_tmb2zt_adj <- ldply(alpha_levels,
ellipse,
x = matrix(data = exp(tmb2zt.adj.fit[3])^2 +
c(exp(tmb2zt.adj.fit[2])^2, 0,
0, exp(tmb2zt.adj.fit[2])^2),
nrow = 2) /
(exp(tmb2zt.adj.fit[3])^2 +
exp(tmb2zt.adj.fit[2])^2),
scale = rep(
sqrt(sum(exp(tmb2zt.adj.fit[2:3])^2)),
2),
centre = rep(tmb2zt.adj.fit[1], 2))
# Make a figure of the data, marking observed (n > 0) abundances in the first
# population as grey and observations in the second populatoin as triangles.
# Add all four estimated ellipses. In the background there is a shade of
# the theoretical (true) distribution
fig1b <- data2_tbl %>%
pivot_wider(names_from = set, values_from = x:z) %>%
ggplot(aes(x = x_1, y = x_2, fill = as.factor(z_1))) +
# True distribution
geom_polygon(data = contour_data, aes(x, y, group=.id, fill = NULL), alpha = 0.15) +
# non-truncated Poisson lognormal
geom_path(data = contour_data_m2, aes(x, y, group=.id, fill = NULL), linetype = 3, size = 1) +
# zero-truncated Poisson lognormal
geom_path(data = contour_data_m2zt, aes(x, y, group=.id, fill = NULL), linetype = 2, size = 1) +
# zero-truncated GLMM
geom_path(data = contour_data_tmb2zt, aes(x, y, group=.id, fill = NULL), size = 1) +
# zero-adjusted GLMM
geom_path(data = contour_data_tmb2zt_adj, aes(x, y, group=.id, fill = NULL), size = 1) +
# Observations
geom_point(aes(shape = as.factor(z_2)), size = 5, alpha = 0.8) +
# aesthetics
scale_shape_manual(values = c(21, 24)) +
scale_fill_manual(values = c("white", "grey")) +
theme_bw() +
labs(x = "Log abundance in population 1",
y = "Log abundance in population 2") +
guides(shape = "none",
fill = "none")
# Unload package
detach("package:plyr", unload = TRUE)
# Plot both figures in one go!
plot_grid(fig1a, fig1b, labels = c('A', 'B'))
# ggsave("zero_truncation_illustration.pdf", width = 8, height = 5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.