dynamicSAD:

# Case studies

# Fish data ####

# temporal correlation and species heterogeneity and different means ####

f_est_sad_mt_cs_dm <- function(data, n_boot = 10, n_rep = 1, n_sp_mult = 1){

  data <- data %>%
    filter(abundance > 0)

  # estimate the temporal correlation
  st_est_glmmTMB_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
                                 (1 | species:pos_m) +
                                 (1 | year_m:pos_m),
                               data = data,
                               family = truncated_poisson(link = "log"))

  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  n_mu <- n_distinct(interaction(data$year_m, data$pos_m))

  for (k in 1:n_boot){

    st_boot_tmp_01 <- c()

    input_01 <- list(n_sp = n_distinct(data$species) * n_sp_mult, # number of species
                     n_year =  unique(parseNumLevels(data$year_m))[order(unique(parseNumLevels(data$year_m)))], # number of time points
                     n_loc = n_distinct(data$pos_m), # number of locations
                     n_obs = n_distinct(data$observer), # number of locations
                     mu = st_est_adj_01[k, 1], # mean log abundance
                     mu_sigma = exp(st_est_adj_01[k, 5])^2, # mean log abundance
                     sigma_r_sq = exp(st_est_adj_01[k, 4])^2, # species heterogeneity
                     sigma_s_sq = exp(st_est_adj_01[k, 2])^2, # environmental variance
                     sigma_d_sq = 0, # sampling variance
                     delta = exp(st_est_adj_01[k, 3]), # "strength of density regulation"
                     eta = 0,
                     pos_x = NULL,
                     pos_y = NULL) # 1 / "spatial scaling of noise"

    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input_01, time = "fixed", dependency = "time", var_mu = TRUE)

      # estimate the temporal correlation
      st_est_boot_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
                                  (1 | species:pos_m) +
                                  (1 | year_m:pos_m),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))


      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par, st_est_boot_01$fit$convergence))

    }

    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] - apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model iteration", k, sep = " "), " \r")

    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu", "lsigma_e", "ldelta", "lsigma_r", "lsigma_mu", "convergence")

  return(as_tibble(st_est_adj_01))

}

# spatial correlation, species heterogeneity and different means ####

f_est_sad_mz_cs_dm <- function(data, n_boot = 20, n_rep = 1, n_sp_mult = 1){

  data <- data %>%
    filter(abundance > 0)

  # estimate the temporal correlation
  st_est_glmmTMB_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
                                 (1 | species:year_m) +
                                 (1 | year_m:pos_m),
                               data = data,
                               family = truncated_poisson(link = "log"))

  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  for (k in 1:n_boot){

    st_boot_tmp_01 <- c()

    input_01 <- list(n_sp = n_distinct(data$species) * n_sp_mult, # number of species
                     n_year =  n_distinct(data$year_m), # number of time points
                     n_loc = n_distinct(data$pos_m), # number of locations
                     n_obs = n_distinct(data$observer), # number of locations
                     mu = st_est_adj_01[k, 1], # mean log abundance
                     mu_sigma = exp(st_est_adj_01[k, 5])^2, # mean log abundance
                     sigma_r_sq = exp(st_est_adj_01[k, 4])^2, # species heterogeneity
                     sigma_s_sq = exp(st_est_adj_01[k, 2])^2, # environmental variance
                     sigma_d_sq = 0, # sampling variance
                     delta = 0, # "strength of density regulation"
                     eta = 1 / exp(st_est_adj_01[k, 3]),
                     pos_x = unique(parseNumLevels(data$pos_m))[, 1],
                     pos_y = unique(parseNumLevels(data$pos_m))[, 2]) # 1 / "spatial scaling of noise"

    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input_01, space = "fixed", dependency = "space", var_mu = TRUE)

      # estimate the temporal correlation
      st_est_boot_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
                                  (1 | species:year_m) +
                                  (1 | year_m:pos_m),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))


      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par, st_est_boot_01$fit$convergence))

    }

    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] - apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model iteration", k, sep = " "), " \r")

    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu", "lsigma_e", "letainv", "lsigma_r", "lsigma_mu","convergence")

  return(as_tibble(st_est_adj_01))

}

# Bats data ####

# temporal correlation, heterogeneity, different means, over-dispersion ####

f_est_sad_mt_cs_v2_dm <- function(data, n_boot = 20, n_rep = 1, n_sp_mult = 3){

  data <- data %>%
    filter(abundance > 0)

  # estimate the temporal correlation
  st_est_glmmTMB_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
                                 (1 | species:pos_m) +
                                 (1 | species:pos_m:year_m:observer_m) +
                                 (1 | year_m:pos_m),
                               data = data,
                               family = truncated_poisson(link = "log"))

  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  for (k in 1:n_boot){

    st_boot_tmp_01 <- c()

    input_01 <- list(n_sp = n_distinct(data$species) * n_sp_mult, # number of species
                     n_year =  unique(parseNumLevels(data$year_m))[order(unique(parseNumLevels(data$year_m)))], # number of time points
                     n_loc = n_distinct(data$pos_m), # number of locations
                     # n_obs = n_distinct(data$observer_m), # number of locations
                     n_obs = 2, # number of locations
                     mu = st_est_adj_01[k, 1], # mean log abundance
                     mu_sigma = exp(st_est_adj_01[k, 6])^2, # mean log abundance
                     sigma_r_sq = exp(st_est_adj_01[k, 4])^2, # species heterogeneity
                     sigma_s_sq = exp(st_est_adj_01[k, 2])^2, # environmental variance
                     sigma_d_sq = exp(st_est_adj_01[k, 5])^2, # sampling variance
                     delta = exp(st_est_adj_01[k, 3]), # "strength of density regulation"
                     eta = 0,
                     pos_x = NULL,
                     pos_y = NULL) # 1 / "spatial scaling of noise"

    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input_01, time = "fixed", dependency = "time", var_mu = TRUE)

      # estimate the temporal correlation
      st_est_boot_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
                                  (1 | species:pos_m) +
                                  (1 | species:pos_m:year_m:observer) +
                                  (1 | year_m:pos_m),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))


      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par, st_est_boot_01$fit$convergence))

    }

    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] - apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model iteration", k, sep = " "), " \r")

    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu", "lsigma_e", "ldelta", "lsigma_r", "lsigma_d", "lsigma_mu", "convergence")

  return(as_tibble(st_est_adj_01))

}

# spatial correlation, heterogeneity, different means, over-dispersion ####

f_est_sad_mz_cs_v2_dm <- function(data, n_boot = 30, n_rep = 3, n_sp_mult = 1){

  data <- data %>%
    filter(abundance > 0)

  # estimate the temporal correlation
  st_est_glmmTMB_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
                                 (1 | species:year_m) +
                                 (1 | species:year_m:pos_m:observer_m) +
                                 (1 | year_m:pos_m),
                               data = data,
                               family = truncated_poisson(link = "log"))

  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  for (k in 1:n_boot){

    st_boot_tmp_01 <- c()

    input_01 <- list(n_sp = n_distinct(data$species) * n_sp_mult, # number of species
                     n_year =  n_distinct(data$year_m), # number of time points
                     n_loc = n_distinct(data$pos_m), # number of locations
                     # n_obs = n_distinct(data$observer_m), # number of locations
                     n_obs = 2, # number of locations
                     mu = st_est_adj_01[k, 1], # mean log abundance
                     mu_sigma = exp(st_est_adj_01[k, 6])^2, # mean log abundance
                     sigma_r_sq = exp(st_est_adj_01[k, 4])^2, # species heterogeneity
                     sigma_s_sq = exp(st_est_adj_01[k, 2])^2, # environmental variance
                     sigma_d_sq = exp(st_est_adj_01[k, 5])^2, # sampling variance
                     delta = 0, # "strength of density regulation"
                     eta = 1 / exp(st_est_adj_01[k, 3]),
                     pos_x = unique(parseNumLevels(data$pos_m))[, 1],
                     pos_y = unique(parseNumLevels(data$pos_m))[, 2]) # 1 / "spatial scaling of noise"

    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input_01, space = "fixed", dependency = "space", var_mu = TRUE)

      # estimate the temporal correlation
      st_est_boot_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
                                  (1 | species:year_m) +
                                  (1 | species:year_m:pos_m:observer) +
                                  (1 | year_m:pos_m),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))


      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par, st_est_boot_01$fit$convergence))

    }

    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] - apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model iteration", k, sep = " "), " \r")

    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu", "lsigma_e", "letainv", "lsigma_r", "lsigma_d", "lsigma_mu", "convergence")

  return(as_tibble(st_est_adj_01))

}