dynamicSAD:

# Different versions of the zero-adjusted algorithm

# Used in the simulation example of different assumptions ####

# f_est_sad_mt: temporal correlation, species heterogeneity, over-dispersion
# f_est_sad_mt_nh: temporal correlation, NO heterogeneity, over-dispersion
# f_est_sad_mt_nc: NO correlation, species heterogeneity, over-dispersion

# Data is assumed to be a tibble (or data.frame) with the following columns:
# species: identity of species (numer or name)
# abundance: abundance (integer) of the species
# year_m: numFactor() of the temporal component, e.g. year
# pos_m: numFactor() of the spatial component, i.e. x and y positions
# observer: a variable that makes each observation unique, e.g. row number,
#           could also be unique for the specific species-time-location combo

# In addition to 'data' the functions takes the following input:
# n_sp: the number of species to simulate each iteration
# n_boot: the number of iterations in each run of the algorithm
# n_rep: if you want to repeat each iteration step multiple times
#        for the same run of the algorithm and take the mean of those values

# Temporal correlation and species heterogeneity ####

f_est_sad_mt <- function(data, n_sp = 1, n_boot = 10, n_rep = 1){

  # the truncated_poisson model takes only positive values
  data <- data %>%
    filter(abundance > 0)

  st_est_glmmTMB_01 <- glmmTMB(
    # abundance is described by
    abundance ~
      # temporal correlation: exponential decay between zero and one,
      # i.e. an Ornstein-Uhlenbeck process
      ou(year_m + 0 | species:pos_m) +
      # species heterogeneity
      (1 | species:pos_m) +
      # over-dispersion (sampling error)
      (1 | species:year_m:pos_m:observer),
    data = data,
    family = truncated_poisson(link = "log"))

  # Store the parameter estimates and convergence information
  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  # k: the number of iterations
  for (k in 1:n_boot){

    # storage for intermediate estimates of simulations
    st_boot_tmp_01 <- c()

    # input: parameter values for the simulation
    input <- list(
      # number of species
      n_sp = n_sp,
      # number of time points
      n_year =  unique(data$year)[order(unique(data$year))],
      # number of locations
      n_loc = n_distinct(data$pos_m),
      # number of locations
      n_obs = n_distinct(data$observer),
      # mean log abundance
      mu = st_est_adj_01[k, 1],
      # variance of mean log abundance
      mu_sigma = 0,
      # species heterogeneity
      sigma_r_sq = exp(st_est_adj_01[k, 4])^2,
      # environmental variance
      sigma_s_sq = exp(st_est_adj_01[k, 2])^2,
      # sampling variance
      sigma_d_sq = exp(st_est_adj_01[k, 5])^2,
      # strength of density regulation
      delta = exp(st_est_adj_01[k, 3]),
      # inverse spatial scaling
      eta = 0,
      # x coordinates
      pos_x = NULL,
      # y coordinates
      pos_y = NULL)

    # i: each iteration step can be repeated multiple times time get an average
    # value of the parameter estimates at the current iteration
    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input,
                                     # we use the specific time points of the
                                     # data set
                                     time = "fixed",
                                     # we are only interested in time, so we
                                     # simulate with independent locations
                                     dependency = "time")

      # estimate the temporal correlation
      st_est_boot_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
                                  (1 | species:pos_m) +
                                  (1 | species:year_m:pos_m:observer),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))

      # store all intermediate estimates
      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par,
                                st_est_boot_01$fit$convergence))

    }

    # adjust the estimate by the (average of the) current iteration
    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] -
      apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model, iteration step", k, sep = " "), " \r")

    # add the new estimate to the matrix of iteration steps
    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  # naming the parameter estimates
  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu",
                               "lsigma_e",
                               "ldelta",
                               "lsigma_r",
                               "lsigma_d",
                               "convergence")

  return(as_tibble(st_est_adj_01))

}

# No heterogeneity ####

# See the function f_est_sad_mt above for explanation of the code

f_est_sad_mt_nh <- function(data, n_sp = 1, n_boot = 10, n_rep = 1){

  data <- data %>%
    filter(abundance > 0)

  st_est_glmmTMB_01 <- glmmTMB(
    abundance ~ ou(year_m + 0 | species:pos_m) +
      (1 | species:year_m:pos_m:observer),
    data = data,
    family = truncated_poisson(link = "log"))

  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  for (k in 1:n_boot){

    st_boot_tmp_01 <- c()

    input <- list(n_sp = n_sp,
                  n_year =  unique(data$year)[order(unique(data$year))],
                  n_loc = n_distinct(data$pos_m),
                  n_obs = n_distinct(data$observer),
                  mu = st_est_adj_01[k, 1],
                  mu_sigma = 0,
                  sigma_r_sq = 0,
                  sigma_s_sq = exp(st_est_adj_01[k, 2])^2,
                  sigma_d_sq = exp(st_est_adj_01[k, 4])^2,
                  delta = exp(st_est_adj_01[k, 3]),
                  eta = 0,
                  pos_x = NULL,
                  pos_y = NULL)

    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input,
                                     time = "fixed",
                                     dependency = "time")

      st_est_boot_01 <- glmmTMB(abundance ~ ou(year_m + 0 | species:pos_m) +
                                  (1 | species:year_m:pos_m:observer),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))


      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par,
                                st_est_boot_01$fit$convergence))

    }

    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] -
      apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model, iteration step", k, sep = " "), " \r")

    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu",
                               "lsigma_e",
                               "ldelta",
                               "lsigma_d",
                               "convergence")

  return(as_tibble(st_est_adj_01))

}

# No temporal correlation ####

# See the function f_est_sad_mt above for explanation of the code

f_est_sad_mt_nc <- function(data, n_sp =1, n_boot = 10, n_rep = 1){

  data <- data %>%
    filter(abundance > 0)

  st_est_glmmTMB_01 <- glmmTMB(
    abundance ~ (1 | species:year_m:pos_m) +
      (1 | species:pos_m) +
      (1 | species:year_m:pos_m:observer),
    data = data,
    family = truncated_poisson(link = "log"))

  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  for (k in 1:n_boot){

    st_boot_tmp_01 <- c()

    input <- list(n_sp = n_sp,
                  n_year =  unique(data$year)[order(unique(data$year))],
                  n_loc = n_distinct(data$pos_m),
                  n_obs = n_distinct(data$observer),
                  mu = st_est_adj_01[k, 1],
                  mu_sigma = 0,
                  sigma_r_sq = exp(st_est_adj_01[k, 3])^2,
                  sigma_s_sq = exp(st_est_adj_01[k, 2])^2,
                  sigma_d_sq = exp(st_est_adj_01[k, 4])^2,
                  # delta = 1000, short-cut to turn correlation to zero
                  delta = 1000,
                  eta = 0,
                  pos_x = NULL,
                  pos_y = NULL)

    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input,
                                     time = "fixed",
                                     dependency = "time")

      st_est_boot_01 <- glmmTMB(abundance ~ (1 | species:year_m:pos_m) +
                                  (1 | species:pos_m) +
                                  (1 | species:year_m:pos_m:observer),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))

      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par,
                                st_est_boot_01$fit$convergence))

    }

    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] -
      apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model, iteration step", k, sep = " "), " \r")

    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu",
                               "lsigma_e",
                               "lsigma_r",
                               "lsigma_d",
                               "convergence")

  return(as_tibble(st_est_adj_01))

}

# Spatial correlation and species heterogeneity ####

f_est_sad_mz <- function(data, n_boot = 20, n_rep = 1, n_sp = 1){

  data <- data %>%
    filter(abundance > 0)

  # estimate the temporal correlation
  st_est_glmmTMB_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
                                 (1 | species:year_m) +
                                 (1 | species:year_m:pos_m:observer),
                               data = data,
                               family = truncated_poisson(link = "log"))

  st_est_adj_01 <- c()
  st_est_adj_01 <- rbind(st_est_adj_01,
                         c(st_est_glmmTMB_01$fit$par,
                           st_est_glmmTMB_01$fit$convergence))

  for (k in 1:n_boot){

    st_boot_tmp_01 <- c()

    input <- list(n_sp = n_distinct(data$species) * n_sp, # number of species
                     n_year =  n_distinct(data$year), # number of time points
                     n_loc = n_distinct(data$pos_m), # number of locations
                     n_obs = n_distinct(data$observer), # number of locations
                     mu = st_est_adj_01[k, 1], # mean log abundance
                     sigma_r_sq = exp(st_est_adj_01[k, 4])^2, # species heterogeneity
                     sigma_s_sq = exp(st_est_adj_01[k, 2])^2, # environmental variance
                     sigma_d_sq = exp(st_est_adj_01[k, 5])^2, # sampling variance
                     delta = 0, # "strength of density regulation"
                     eta = 1 / exp(st_est_adj_01[k, 3]),
                     pos_x = unique(data$pos_x)[order(unique(data$pos_x))],
                     pos_y = unique(data$pos_y)[order(unique(data$pos_y))]) # 1 / "spatial scaling of noise"

    for (i in 1:n_rep){

      data_boot_01 <- f_sim_sad_fast(input, space = "fixed", dependency = "space")

      # estimate the temporal correlation
      st_est_boot_01 <- glmmTMB(abundance ~ exp(pos_m + 0 | species:year_m) +
                                  (1 | species:year_m) +
                                  (1 | species:year_m:pos_m:observer),
                                data = filter(data_boot_01, abundance > 0),
                                family = truncated_poisson(link = "log"))


      st_boot_tmp_01 <- rbind(st_boot_tmp_01,
                              c(st_est_boot_01$fit$par, st_est_boot_01$fit$convergence))

    }

    st_est_boot_adj_01 <- st_est_adj_01[k, ] + st_est_adj_01[1, ] - apply(st_boot_tmp_01, 2, mean)

    cat(paste("Fitting model", case, "iteration", it, "of 100, boot correction ", k, sep = " "), " \r")

    st_est_adj_01 <- rbind(st_est_adj_01,
                           st_est_boot_adj_01)

  }

  rownames(st_est_adj_01) <- NULL
  colnames(st_est_adj_01) <- c("mu", "lsigma_e", "letainv", "lsigma_r", "lsigma_d", "convergence")

  return(as_tibble(st_est_adj_01))

}