R/example_poilog_glmm_comparison.R

# Requires:
# source(file = "R/required_packages.R")
# source(file = "R/f_sim_sad.R")
# source(file = "R/f_glmm_est.R")
# source(file = "R/f_poilog_est.R")

# Parameters ####

input_ex01 <- list(n_sp = 100, # number of species
                   n_year = 20, # number of time points
                   n_loc = 20, # number of locations
                   n_obs = 2, # number of observers, per space-time point
                   mu = 0, # mean log abundance
                   sigma_r_sq = 2, # species heterogeneity
                   sigma_s_sq = 1, # environmental variance
                   sigma_d_sq = 0.5, # sampling variance
                   delta = 0.2, # "strength of density regulation"
                   eta = 0.2, # 1 / "spatial scaling of noise"
                   pos_x = NULL, # x-coordinates of locations
                   pos_y = NULL) # y-coordinates of locations

# Simulations ####
# Since we have already done the estimation, we only need one data set to
# compute the distances needed for the calculation

sim_ex01 <- list()

for (i in 1:1){
# for (i in 1:100){

  sim_ex01[[i]] <- f_sim_sad_fast(input_ex01)

  cat(i, "%", " \r")

}
# rm(i)

# Read results
# The estimation of the pairwise correlation is slow, so the estimates were
# computed in three parallel sessions.

est_ex01_mt <- read_rds("poilog_comparison/est_ex01_mt.rds")
est_ex01_mz <- read_rds("poilog_comparison/est_ex01_mz.rds")
est_ex01_yl <- read_rds("poilog_comparison/est_ex01_yl.rds")
est_ex01_yll <- read_rds("poilog_comparison/est_ex01_yll.rds")
est_ex01_yyll <- read_rds("poilog_comparison/est_ex01_yyll.rds")
est_ex01_ylo <- read_rds("poilog_comparison/est_ex01_ylo.rds")

est_ex02_mt <- read_rds("poilog_comparison/est_ex02_mt.rds")
est_ex02_mz <- read_rds("poilog_comparison/est_ex02_mz.rds")
est_ex02_yl <- read_rds("poilog_comparison/est_ex02_yl.rds")
est_ex02_yll <- read_rds("poilog_comparison/est_ex02_yll.rds")
est_ex02_yyll <- read_rds("poilog_comparison/est_ex02_yyll.rds")
est_ex02_ylo <- read_rds("poilog_comparison/est_ex02_ylo.rds")

est_ex03_mt <- read_rds("poilog_comparison/est_ex03_mt.rds")
est_ex03_mz <- read_rds("poilog_comparison/est_ex03_mz.rds")
est_ex03_yl <- read_rds("poilog_comparison/est_ex03_yl.rds")
est_ex03_yll <- read_rds("poilog_comparison/est_ex03_yll.rds")
est_ex03_yyll <- read_rds("poilog_comparison/est_ex03_yyll.rds")
est_ex03_ylo <- read_rds("poilog_comparison/est_ex03_ylo.rds")

# Estimation ####

# est_ex01_mt <- list()
# est_ex01_mz <- list()
# est_ex01_yl <- list()
# est_ex01_yll <- list()
# est_ex01_yyll <- list()
# est_ex01_ylo <- list()

# If you want to estimate the data sets again, uncomment this:
# for (i in 1:100){
#
#   est_ex01_mt[[i]] <- try(f_est_sad_mt(data = filter(sim_ex01[[i]],
#                                                      abundance > 0 &
#                                                        year %in% c(1, 5, 10, 15, 20) &
#                                                        pos_x %in% c(1, 5, 10, 15, 20)),
#                                      n_boot = 10,
#                                      n_rep = 3,
#                                      n_sp_mult = 1))
#
#   cat(i, "%", "mt", " \r")
#
#   est_ex01_mz[[i]] <- try(f_est_sad_mz(data = filter(sim_ex01[[i]],
#                                                      abundance > 0 &
#                                                        year %in% c(1, 5, 10, 15, 20) &
#                                                        pos_x %in% c(1, 5, 10, 15, 20)),
#                                      n_boot = 10,
#                                      n_rep = 3,
#                                      n_sp_mult = 1))
#
#   cat(i, "%", "mz", " \r")
#
#   est_ex01_yl[[i]] <- try(f_poilog_yl(data = filter(sim_ex01[[i]],
#                                                     abundance > 0 &
#                                                       year %in% c(1, 5, 10, 15, 20) &
#                                                       pos_x %in% c(1, 5, 10, 15, 20))))
#
#   cat(i, "%", "yl", " \r")
#
#   est_ex01_yll[[i]] <- try(f_poilog_yll(data = filter(sim_ex01[[i]],
#                                                       abundance > 0 &
#                                                         year %in% c(1, 5, 10, 15, 20) &
#                                                         pos_x %in% c(1, 5, 10, 15, 20))))
#
#   cat(i, "%", "yll", " \r")
#
#   est_ex01_yyll[[i]] <- try(f_poilog_yyll(data = filter(sim_ex01[[i]],
#                                                         abundance > 0 &
#                                                           year %in% c(1, 5, 10, 15, 20) &
#                                                           pos_x %in% c(1, 5, 10, 15, 20))))
#
#   cat(i, "%", "yyll", " \r")
#
#   est_ex01_ylo[[i]] <- try(f_poilog_ylo(data = filter(sim_ex01[[i]],
#                                                       abundance > 0 &
#                                                         year %in% c(1, 5, 10, 15, 20) &
#                                                         pos_x %in% c(1, 5, 10, 15, 20))))
#
#   cat(i, "%", "ylo", " \r")
#
# }

# write_rds(est_ex01_mt, "poilog_comparison/est_ex03_mt.rds")
# write_rds(est_ex01_mz, "poilog_comparison/est_ex03_mz.rds")
# write_rds(est_ex01_yl, "poilog_comparison/est_ex03_yl.rds")
# write_rds(est_ex01_yll, "poilog_comparison/est_ex03_yll.rds")
# write_rds(est_ex01_yyll, "poilog_comparison/est_ex03_yyll.rds")
# write_rds(est_ex01_ylo, "poilog_comparison/est_ex03_ylo.rds")

# Join results together ####
# zero-adjusted glmm:
est_glmm_mt <- c()
est_glmm_mz <- c()
for (i in c(1:60)){
  if (is_tibble(est_ex01_mt[[i]])){
    est_glmm_mt <- bind_rows(est_glmm_mt,
                             add_column(est_ex01_mt[[i]],
                                        replicate = i,
                                        iteration = 0:10))
  }
  if (is_tibble(est_ex01_mz[[i]])){
    est_glmm_mz <- bind_rows(est_glmm_mz,
                             add_column(est_ex01_mz[[i]],
                                        replicate = i,
                                        iteration = 0:10))
  }
}
for (i in c(1:25)){
  if (is_tibble(est_ex02_mt[[i]])){
    est_glmm_mt <- bind_rows(est_glmm_mt,
                             add_column(est_ex02_mt[[i]],
                                        replicate = i + 60,
                                        iteration = 0:10))
  }
  if (is_tibble(est_ex02_mz[[i]])){
    est_glmm_mz <- bind_rows(est_glmm_mz,
                             add_column(est_ex02_mz[[i]],
                                        replicate = i + 60,
                                        iteration = 0:10))
  }
}
for (i in c(1:22)){
  if (is_tibble(est_ex03_mt[[i]])){
    est_glmm_mt <- bind_rows(est_glmm_mt,
                             add_column(est_ex03_mt[[i]],
                                        replicate = i + 85,
                                        iteration = 0:10))
  }
  if (is_tibble(est_ex03_mz[[i]])){
    est_glmm_mz <- bind_rows(est_glmm_mz,
                             add_column(est_ex03_mz[[i]],
                                        replicate = i + 85,
                                        iteration = 0:10))
  }
}

# poilog estimates:
# add distances:
est_poilog <- list()
for (i in 1:60){
  est_poilog[[i]] <- f_distance(data = filter(sim_ex01[[1]],
                                              abundance > 0 &
                                                year %in% c(1, 5, 10, 15, 20) &
                                                pos_x %in% c(1, 5, 10, 15, 20)),
                                result_yll = est_ex01_yll[[i]],
                                result_yyll = est_ex01_yyll[[i]])
}
for (i in 1:25){
  est_poilog[[i + 60]] <- f_distance(data = filter(sim_ex01[[1]],
                                              abundance > 0 &
                                                year %in% c(1, 5, 10, 15, 20) &
                                                pos_x %in% c(1, 5, 10, 15, 20)),
                                result_yll = est_ex02_yll[[i]],
                                result_yyll = est_ex02_yyll[[i]])
}
for (i in 1:22){
  est_poilog[[i + 85]] <- f_distance(data = filter(sim_ex01[[1]],
                                              abundance > 0 &
                                                year %in% c(1, 5, 10, 15, 20) &
                                                pos_x %in% c(1, 5, 10, 15, 20)),
                                result_yll = est_ex03_yll[[i]],
                                result_yyll = est_ex03_yyll[[i]])
}

# total variance:
tot_var_poilog <- c()
for (i in 1:60){
  tot_var_poilog <- bind_rows(tot_var_poilog,
                              add_column(f_tot_var(est_ex01_yl[[i]]),
                                         replicate = i))
}
for (i in 1:25){
  tot_var_poilog <- bind_rows(tot_var_poilog,
                              add_column(f_tot_var(est_ex02_yl[[i]]),
                                         replicate = i + 60))
}
for (i in 1:22){
  tot_var_poilog <- bind_rows(tot_var_poilog,
                              add_column(f_tot_var(est_ex03_yl[[i]]),
                                         replicate = i + 85))
}

# correlation at time/distance zero, i.e. over-dispersion
rho0_poilog <- c()
for (i in 1:60){
  rho0_poilog <- bind_rows(rho0_poilog,
                           add_column(f_prop_error(est_ex01_ylo[[i]]),
                                      replicate = i))
}
for (i in 1:25){
  rho0_poilog <- bind_rows(rho0_poilog,
                           add_column(f_prop_error(est_ex02_ylo[[i]]),
                                      replicate = i + 60))
}
for (i in 1:22){
  rho0_poilog <- bind_rows(rho0_poilog,
                           add_column(f_prop_error(est_ex03_ylo[[i]]),
                                      replicate = i + 85))
}

# join all estimates together:
rho_res_poilog <- c()
for (i in 1:107){
  rho_res_poilog <- bind_rows(rho_res_poilog,
                              add_column(f_rho_fun(result_yyll = est_poilog[[i]],
                                                   prop_error = rho0_poilog$rho_0[i]),
                                         replicate = i))
}

# Computing the different variance components for the poilog model:
poilog_mle_est <- left_join(left_join(tot_var_poilog,
                                      rho0_poilog),
                            rho_res_poilog) %>%
  mutate(rho_sum = rho_0 + prop_env + prop_het) %>%
  mutate(sigma_e_sq = avg_sigma_sq * prop_env,
         sigma_r_sq = avg_sigma_sq * prop_het,
         sigma_d_sq = avg_sigma_sq * rho_0) %>%
  dplyr::select(replicate,
                avg_mu,
                sigma_e_sq,
                delta,
                eta,
                sigma_r_sq,
                sigma_d_sq)

# Transforming variances to compare with the zero-adjusted glmm:
poilog_mle_lest <- poilog_mle_est %>%
  mutate(lsigma_e = log(sqrt(sigma_e_sq)),
         lsigma_r = log(sqrt(sigma_r_sq)),
         lsigma_d = log(sqrt(sigma_d_sq)),
         ldelta = log(delta),
         letainv = log(1 / eta)) %>%
  rename(mu = avg_mu) %>%
  pivot_longer(cols = c(mu, lsigma_e:letainv))

glmm_data <- bind_rows(add_column(est_glmm_mt,
                                  dimension = "time"),
                       add_column(est_glmm_mz,
                                  dimension = "space")) %>%
  pivot_longer(cols = c(mu:lsigma_d, letainv))

# Histograms of estimates
bind_rows(dplyr::select(mutate(dplyr::select(filter(glmm_data,
                            iteration == 10),
                            name, value, dimension),
                     method = paste("zero-adjusted",
                                    dimension, sep = ", ")),
                 -dimension),
          add_column(dplyr::select(poilog_mle_lest,
                                   name, value),
                     method = "poilog")) %>%
  # Take out some of the most extreme values from the poilog estimates for
  # better visibility
  filter(!(value < 0 & name == "letainv") &
           !(value > 4 & name == "letainv") &
           !(value > 1 & name == "lsigma_r") &
           !(value < -1 & name == "lsigma_r") &
           !(value < -5 & name == "ldelta")) %>%
  ggplot(aes(x = value)) +
  geom_histogram(bins = 12,
                 aes(fill = method),
                 colour = "black",
                 position = "dodge") +
  # True values
  geom_vline(data = tibble(value = c(log(sqrt(1)),
                                     log(sqrt(2)),
                                     log(sqrt(0.5)),
                                     log(0.2),
                                     log(1/0.2),
                                     0),
                           name = c("lsigma_e",
                                    "lsigma_r",
                                    "lsigma_d",
                                    "ldelta",
                                    "letainv",
                                    "mu")),
             aes(xintercept = value)) +
  facet_wrap(~ name, scales = "free_x") +
  scale_fill_manual("Model",
                    values = c("zero-adjusted, time" = "dark grey",
                               "zero-adjusted, space" = "light grey",
                               "poilog" = "white")) +
  labs(x = "Parameter estimate", y = "Count") +
  theme_bw()

# ggsave("glmm_poilog_comparison_v2.pdf", width = 8, height = 5)
erikbsolbu/dynamicSAD documentation built on Jan. 25, 2021, 5 a.m.