dynamicSAD:

# Case study: fish data from BioTIME (Dornelas et al. 2018)

# Require:
# source(file = "R/required_packages.R")
# source(file = "R/f_sim_sad.R")
# source(file = "R/f_glmm_est_cs.R")

# Data preparation ####

# The data can be downloaded from biotime.st-andrews.ac.uk as:

# Study ID 467: Fish
# Spatial and temporal organisation of a coastal lagoon fish community

# Load data
fish_data <- read_csv("rawdata_fish_portugal.csv")

# Transform to utm coordinates for spatial distance calculation
latlong_fish <- distinct(dplyr::select(fish_data, LATITUDE, LONGITUDE))

st_fish <- st_sfc(st_multipoint(cbind(latlong_fish$LONGITUDE,
                                      latlong_fish$LATITUDE)))

st_crs(st_fish) <- "+proj=longlat +datum=WGS84"

st_fish <- st_transform(st_fish, "+proj=utm +zone=29 ellps=WGS84")

utm_fish <- st_coordinates(st_fish)

# Convert distance to kilometres
ll_utm_fish <- as_tibble(cbind(latlong_fish, utm_fish[, -3]/1000))

fish_data <- left_join(fish_data, ll_utm_fish)

# Create spatial and temporal variables

fish_data <- fish_data %>%
  mutate(date = ymd(paste(YEAR, MONTH, DAY)),
         time_diff = as.numeric(date - min(date)),
         location = numFactor(X, Y),
         time_m = numFactor(round(time_diff / 30)))

# Figures to illustrate the raw data (for supplementary material)

# to create the vertical red lines in the raster figure
inter_fish <- fish_data %>%
  mutate(location = rank(location)) %>%
  dplyr::select(time_m, location) %>%
  arrange(location, time_m) %>%
  distinct() %>%
  mutate(inter = interaction(time_m, location)) %>%
  group_by(location) %>%
  mutate(min_int = inter[1],
         max_int = inter[length(inter)]) %>%
  ungroup() %>%
  dplyr::select(min_int, max_int) %>%
  distinct()

fish_data %>%
  mutate(location = rank(location)) %>%
  ggplot(aes(x = interaction(time_m, location),
             y = interaction(GENUS_SPECIES))) +
  geom_raster(aes(fill = log(ABUNDANCE)), hjust = 1) +
  geom_vline(data = inter_fish, aes(xintercept = min_int), colour = "red") +
  scale_fill_viridis_c("log ab.") +
  labs(x = "All time points at a given location",
       y = "Species") +
  theme(axis.text.x = element_text(hjust = 1, angle = 45))

# ggsave("fish_logab_temporal_model.pdf", width = 6, height = 9)

# again, to create vertical red lines in raster
inter_fish <- fish_data %>%
  mutate(location = rank(location)) %>%
  dplyr::select(time_m, location) %>%
  arrange(location, time_m) %>%
  distinct() %>%
  mutate(inter = interaction(location, time_m)) %>%
  group_by(time_m) %>%
  mutate(min_int = inter[1],
         max_int = inter[length(inter)]) %>%
  ungroup() %>%
  dplyr::select(min_int, max_int) %>%
  distinct()

fish_data %>%
  mutate(location = rank(location)) %>%
  ggplot(aes(x = interaction(location, time_m),
             y = interaction(GENUS_SPECIES))) +
  geom_raster(aes(fill = log(ABUNDANCE)), hjust = 1) +
  geom_vline(data = inter_fish, aes(xintercept = min_int), colour = "red") +
  scale_fill_viridis_c("log ab.") +
  labs(x = "All locations at a given time point",
       y = "Species") +
  theme(axis.text.x = element_text(hjust = 1, angle = 45))

# ggsave("fish_logab_spatial_model.pdf", width = 6, height = 9)

# average, median abundance and number of species at given time-location point
fish_data %>%
  group_by(time_m, location) %>%
  summarise(med_ab = median(ABUNDANCE),
            n_sp = n_distinct(GENUS_SPECIES)) %>%
  ungroup() %>%
  summarise(avg_med_ab = mean(med_ab),
            avg_n_sp = mean(n_sp))

# Renaming to match the names used in simulations of the bias correction

fish_data <- fish_data %>%
  rename(abundance = ABUNDANCE,
         year_m = time_m,
         pos_m = location,
         species = GENUS_SPECIES) %>%
  add_column(observer = 1)

# Estimation ####

# we do 100 replicates and use these to compute point estimates

# Temporal and spatial correlation by month, different means

# fish_mt_dm_long <- list()
# fish_mz_dm_long <- list()
# for (i in 1:100){
#
#   fish_mt_dm_long[[i]] <- try(f_est_sad_mt_cs_dm(data = fish_data,
#                                                  n_boot = 30,
#                                                  n_rep = 1,
#                                                  n_sp_mult = 1))
#
#   fish_mz_dm_long[[i]] <- try(f_est_sad_mz_cs_dm(data = fish_data,
#                                                  n_boot = 30,
#                                                  n_rep = 1,
#                                                  n_sp_mult = 1))
# }

# Save estimates
# write_rds(fish_mt_dm_long, "fish_mt_dm_long.rds")
# write_rds(fish_mz_dm_long, "fish_mz_dm_long.rds")

# Load estimates
fish_mt_dm_long <- read_rds("fish_mt_dm_long.rds")
fish_mz_dm_long <- read_rds("fish_mz_dm_long.rds")

# Write estimates to a table
fish_mt_dm_long_res <- c()
fish_mz_dm_long_res <- c()
for (i in 1:100){
  if (is_tibble(fish_mt_dm_long[[i]])){
    fish_mt_dm_long_res <- bind_rows(fish_mt_dm_long_res,
                                     add_column(fish_mt_dm_long[[i]],
                                                replicate = i,
                                                iteration = 0:(nrow(fish_mt_dm_long[[i]]) - 1)))
  }
  if (is_tibble(fish_mz_dm_long[[i]])){
    fish_mz_dm_long_res <- bind_rows(fish_mz_dm_long_res,
                                     add_column(fish_mz_dm_long[[i]],
                                                replicate = i,
                                                iteration = 0:(nrow(fish_mz_dm_long[[i]]) - 1)))
  }
}

# Inspect convergence with figure ####
bind_rows(add_column(fish_mt_dm_long_res, dimension = "Temporal"),
                           add_column(fish_mz_dm_long_res, dimension = "Spatial")) %>%
  pivot_longer(cols = c(mu:lsigma_mu, letainv)) %>%
  ggplot(aes(x = iteration, y = value)) +
  geom_line(aes(group = interaction(dimension, replicate)), alpha = 0.5) +
  facet_grid(name ~ dimension, scales = "free_y") +
  theme_bw()

# ggsave("sm_convergence_fish_data.pdf", width = 6, height = 8)

# Compute different point estimates as functions of initial estimates ####
fish_res_long <- bind_rows(add_column(fish_mt_dm_long_res, dimension = "Temporal"),
                           add_column(fish_mz_dm_long_res, dimension = "Spatial")) %>%
  mutate(sigma_e_sq_p = exp(lsigma_e)^2,
         "delta" = ifelse(is.na(ldelta), NA, exp(ldelta)),
         "eta" = ifelse(is.na(letainv), NA, 1 / exp(letainv)),
         sigma_r_sq_p = ifelse(is.na(lsigma_r), NA, exp(lsigma_r)^2),
         sigma_mu_sq_p = ifelse(is.na(lsigma_mu), NA, exp(lsigma_mu)^2),
         "sigma[m]^2" = ifelse(is.na(lsigma_mu), NA, exp(lsigma_mu)^2),
         "sigma[e]^2" = ifelse(is.na(delta), NA, sigma_e_sq_p * (2 * delta)),
         "sigma[r]^2" = ifelse(is.na(delta), NA, sigma_r_sq_p * (delta ^ 2)),
         ret_time = 1 / delta,
         spat_scal = 1 / eta) %>%
  pivot_longer(cols = c(mu:lsigma_r, lsigma_mu, letainv, sigma_e_sq_p:spat_scal))

fish_est_sum <- fish_res_long %>%
  # # remove two outliers
  # filter(!(replicate == 83 & dimension == "Spatial")) %>%
  # filter(!(replicate == 32 & dimension == "Temporal")) %>%
  group_by(dimension, name, iteration) %>%
  # compute mean, error, median, and different quantiles
  summarise(avg_est = mean(value, na.rm = TRUE),
            sd_err = sd(value, na.rm = TRUE),
            med_est = median(value, na.rm = TRUE),
            q05_est = quantile(value, probs = 0.05, na.rm = TRUE),
            q25_est = quantile(value, probs = 0.25, na.rm = TRUE),
            q75_est = quantile(value, probs = 0.75, na.rm = TRUE),
            q95_est = quantile(value, probs = 0.95, na.rm = TRUE))

# Store results
# write_csv(filter(fish_est_sum, iteration == 30), "fish_est.csv")

# Point estimates ####

fish_point_est <- fish_est_sum %>%
  filter(iteration == 30) %>%
  dplyr::select(dimension, name, avg_est)

# Bootstrap estimates ####

# Use estimates as input values
# fish_point_est_wide <- fish_point_est %>%
#   pivot_wider(values_from = avg_est)
#
# fish_boot_mt_input <- list(n_sp = n_distinct(fish_data$species), # number of species
#                            n_year =  unique(parseNumLevels(fish_data$year_m))[order(unique(parseNumLevels(fish_data$year_m)))], # number of time points
#                            n_loc = n_distinct(fish_data$pos_m), # number of locations
#                            n_obs = n_distinct(fish_data$observer), # number of locations
#                            mu = fish_point_est_wide$mu[2], # mean log abundance
#                            mu_sigma = fish_point_est_wide$sigma_mu_sq_p[2], # mean log abundance
#                            sigma_r_sq = fish_point_est_wide$sigma_r_sq_p[2], # species heterogeneity
#                            sigma_s_sq = fish_point_est_wide$sigma_e_sq_p[2], # environmental variance
#                            sigma_d_sq = 0, # sampling variance
#                            delta = fish_point_est_wide$delta[2], # "strength of density regulation"
#                            eta = 0,
#                            pos_x = NULL,
#                            pos_y = NULL) # 1 / "spatial scaling of noise"
#
# fish_boot_mz_input <- list(n_sp = n_distinct(fish_data$species), # number of species
#                            n_year =   n_distinct(fish_data$year_m), # number of time points
#                            n_loc = n_distinct(fish_data$pos_m), # number of locations
#                            n_obs = n_distinct(fish_data$observer), # number of locations
#                            mu = fish_point_est_wide$mu[1], # mean log abundance
#                            mu_sigma = fish_point_est_wide$sigma_mu_sq_p[1], # mean log abundance
#                            sigma_r_sq = fish_point_est_wide$sigma_r_sq_p[1], # species heterogeneity
#                            sigma_s_sq = fish_point_est_wide$sigma_e_sq_p[1], # environmental variance
#                            sigma_d_sq = 0, # sampling variance
#                            delta = 0, # "strength of density regulation"
#                            eta = fish_point_est_wide$eta[1],
#                            pos_x = unique(parseNumLevels(fish_data$pos_m))[, 1],
#                            pos_y = unique(parseNumLevels(fish_data$pos_m))[, 2]) # 1 / "spatial scaling of noise"
#
## simulate and estimate parameters again
# fish_mt_boot_dm_long <- list()
# fish_mz_boot_dm_long <- list()
# for (i in 1:100){
#
#   fish_data_mt_boot <- f_sim_sad_fast(input = fish_boot_mt_input, time = "fixed", dependency = "time", var_mu = TRUE)
#   fish_data_mz_boot <- f_sim_sad_fast(input = fish_boot_mz_input, space = "fixed", dependency = "space", var_mu = TRUE)
#
#   fish_mt_boot_dm_long[[i]] <- try(f_est_sad_mt_cs_dm(data = fish_data_mt_boot,
#                                                       n_boot = 30,
#                                                       n_rep = 1,
#                                                       n_sp_mult = 1))
#
#   fish_mz_boot_dm_long[[i]] <- try(f_est_sad_mz_cs_dm(data = fish_data_mz_boot,
#                                                       n_boot = 30,
#                                                       n_rep = 1,
#                                                       n_sp_mult = 1))
# }

# Save estimates
# write_rds(fish_mt_boot_dm_long, "fish_mt_boot_dm_long.rds")
# write_rds(fish_mz_boot_dm_long, "fish_mz_boot_dm_long.rds")

# Load estimates
fish_mt_boot_dm_long <- read_rds("fish_mt_boot_dm_long.rds")
fish_mz_boot_dm_long <- read_rds("fish_mz_boot_dm_long.rds")

# Write estimates to a table
fish_mt_boot_dm_long_res <- c()
fish_mz_boot_dm_long_res <- c()
for (i in 1:100){
  if (is_tibble(fish_mt_boot_dm_long[[i]])){
    fish_mt_boot_dm_long_res <- bind_rows(fish_mt_boot_dm_long_res,
                                     add_column(fish_mt_boot_dm_long[[i]],
                                                replicate = i,
                                                iteration = 0:(nrow(fish_mt_boot_dm_long[[i]]) - 1)))
  }
  if (is_tibble(fish_mz_boot_dm_long[[i]])){
    fish_mz_boot_dm_long_res <- bind_rows(fish_mz_boot_dm_long_res,
                                     add_column(fish_mz_boot_dm_long[[i]],
                                                replicate = i,
                                                iteration = 0:(nrow(fish_mz_boot_dm_long[[i]]) - 1)))
  }
}

fish_boot_res_long <- bind_rows(add_column(fish_mt_boot_dm_long_res, dimension = "Temporal"),
                           add_column(fish_mz_boot_dm_long_res, dimension = "Spatial")) %>%
  mutate(sigma_e_sq_p = exp(lsigma_e)^2,
         "delta" = ifelse(is.na(ldelta), NA, exp(ldelta)),
         "eta" = ifelse(is.na(letainv), NA, 1 / exp(letainv)),
         sigma_r_sq_p = ifelse(is.na(lsigma_r), NA, exp(lsigma_r)^2),
         sigma_mu_sq_p = ifelse(is.na(lsigma_mu), NA, exp(lsigma_mu)^2),
         "sigma[m]^2" = ifelse(is.na(lsigma_mu), NA, exp(lsigma_mu)^2),
         "sigma[e]^2" = ifelse(is.na(delta), NA, sigma_e_sq_p * (2 * delta)),
         "sigma[r]^2" = ifelse(is.na(delta), NA, sigma_r_sq_p * (delta ^ 2)),
         ret_time = 1 / delta,
         spat_scal = 1 / eta) %>%
  pivot_longer(cols = c(mu:lsigma_r, lsigma_mu, letainv, sigma_e_sq_p:spat_scal))

fish_boot_est_sum <- fish_boot_res_long %>%
  # remove two outliers
  filter(!(replicate == 83 & dimension == "Spatial")) %>%
  filter(!(replicate == 32 & dimension == "Temporal")) %>%
  group_by(dimension, name, iteration) %>%
  # compute mean, error, median, and different quantiles
  summarise(avg_est = mean(value, na.rm = TRUE),
            sd_err = sd(value, na.rm = TRUE),
            med_est = median(value, na.rm = TRUE),
            q05_est = quantile(value, probs = 0.05, na.rm = TRUE),
            q25_est = quantile(value, probs = 0.25, na.rm = TRUE),
            q75_est = quantile(value, probs = 0.75, na.rm = TRUE),
            q95_est = quantile(value, probs = 0.95, na.rm = TRUE))

# Store results
# write_csv(filter(fish_boot_est_sum, iteration == 30), "fish_boot_est.csv")

# Histogram of parameter estimates based on full model ####
# accounting for variation in mean values between space-time points

fish_boot_res_long %>%
  filter(iteration == 30) %>%
  filter(!(replicate == 83 & dimension == "Spatial")) %>%
  filter(!(replicate == 32 & dimension == "Temporal")) %>%
  filter(name %in% c("ldelta", "letainv", "lsigma_mu", "lsigma_e", "lsigma_r", "mu")) %>%
  ggplot(aes(x = value)) +
  geom_histogram(bins = 10, colour = "black", fill = "light grey") +
  geom_vline(data = filter(fish_point_est,
                           name %in% c("ldelta",
                                       "letainv",
                                       "lsigma_mu",
                                       "lsigma_e",
                                       "lsigma_r",
                                       "mu")),
             aes(xintercept = avg_est)) +
  facet_grid(dimension ~ name, scales = "free") +
  theme_bw()

# ggsave("fish_data_parameter_uncertainty.pdf", width = 8, height = 5)

# Temporal correlation ####

# Point estimates based on the original data
fish_point_est_t <- fish_point_est %>%
  filter(dimension == "Temporal") %>%
  pivot_wider(values_from = "avg_est")

# Compute rho(t, 0)
fish_mt_corr <- tibble(time = 0:max(dist(parseNumLevels(fish_data$year_m))),
                       rho = (fish_point_est_t$sigma_e_sq_p *
                                exp(-fish_point_est_t$delta * time) +
                                fish_point_est_t$sigma_r_sq_p) /
                         (fish_point_est_t$sigma_e_sq_p +
                            fish_point_est_t$sigma_r_sq_p +
                            fish_point_est_t$sigma_mu_sq_p))

fish_mt_corr_res <- fish_mt_corr %>%
  add_column(dimension = "Temporal")

# Bootstrap estimates
fish_mt_boot <- fish_boot_res_long %>%
  # Remove two outliers
  filter(!(replicate == 83 & dimension == "Spatial")) %>%
  filter(!(replicate == 32 & dimension == "Temporal")) %>%
  filter(iteration == 30 &
           dimension == "Temporal") %>%
  pivot_wider()

# Compute correlation for each bootstrap replicate, to be used for the
# calculation of confidence intervals
tmp <- c()
for (i in 1:nrow(fish_mt_boot)){
  for (time in c(0, seq(10, 140, by = 10),
                 max(dist(parseNumLevels(fish_data$year_m))))){
    rho <- (exp(fish_mt_boot$lsigma_e[i])^2 *
              exp(-exp(fish_mt_boot$ldelta[i]) * time) +
              exp(fish_mt_boot$lsigma_r[i])^2) /
      (exp(fish_mt_boot$lsigma_e[i])^2 +
         exp(fish_mt_boot$lsigma_r[i])^2 +
         exp(fish_mt_boot$lsigma_mu[i])^2)
    tmp <- rbind(tmp,
                 c(i, time, rho))
  }
}

# Make a table of the correlations
corr_mt_tbl <- tibble(replicate = tmp[, 1],
                      time = tmp[, 2],
                      rho = tmp[, 3])

# Compute quantiles
corr_mt_q_tbl <- corr_mt_tbl %>%
  group_by(time) %>%
  summarise(q025 = quantile(rho, probs = 0.025),
            q05 = quantile(rho, probs = 0.05),
            q25 = quantile(rho, probs = 0.25),
            q5 = quantile(rho, probs = 0.5),
            q75 = quantile(rho, probs = 0.75),
            q95 = quantile(rho, probs = 0.95),
            q975 = quantile(rho, probs = 0.975))

# Compute correlation at observed time points
fish_mt_corr_obs <- tibble(time = unique(dist(parseNumLevels(fish_data$year_m))),
                       rho = (fish_point_est_t$sigma_e_sq_p *
                                exp(-fish_point_est_t$delta * time) +
                                fish_point_est_t$sigma_r_sq_p) /
                         (fish_point_est_t$sigma_e_sq_p +
                            fish_point_est_t$sigma_r_sq_p +
                            fish_point_est_t$sigma_mu_sq_p))

# Make a correlation plot
fish_corr_plot_a <- fish_mt_corr_res %>%
  ggplot(aes(x = time, y = rho)) +
  geom_ribbon(data = corr_mt_q_tbl, aes(ymin = q05, y = q5, ymax = q95), alpha = 0.2) +
  geom_ribbon(data = corr_mt_q_tbl, aes(ymin = q25, y = q5, ymax = q75), alpha = 0.2) +
  geom_line() +
  geom_point(data = fish_mt_corr_obs,
             fill = "light grey", shape = 21, size = 2) +
  scale_x_continuous(breaks = c(0, seq(25, 125, by = 25))) +
  lims(y = c(0, 1)) +
  labs(x = "Time (months)", y = "Correlation") +
  theme_bw()

# Spatial correlation ####

fish_point_est_z <- fish_point_est %>%
  filter(dimension == "Spatial") %>%
  pivot_wider(values_from = "avg_est")

fish_mz_corr <- tibble(distance = 0:max(dist(parseNumLevels(fish_data$pos_m))),
                       rho = (fish_point_est_z$sigma_e_sq_p *
                                exp(-fish_point_est_z$eta * distance) +
                                fish_point_est_z$sigma_r_sq_p) /
                         (fish_point_est_z$sigma_e_sq_p +
                            fish_point_est_z$sigma_r_sq_p +
                            fish_point_est_z$sigma_mu_sq_p))

fish_mz_corr_res <- bind_rows(add_column(fish_mz_corr, dimension = "Spatial"))

fish_mz_boot <- fish_res_long %>%
  filter(!(replicate == 83 & dimension == "Spatial")) %>%
  filter(!(replicate == 32 & dimension == "Temporal")) %>%
  filter(iteration == 30 &
           dimension == "Spatial") %>%
  pivot_wider()

tmp <- c()
for (i in 1:nrow(fish_mz_boot)){
  for (distance in c(0, seq(1, 35, by = 1), max(dist(parseNumLevels(fish_data$pos_m))))){
    rho <- (exp(fish_mz_boot$lsigma_e[i])^2 *
              exp(- distance / exp(fish_mz_boot$letainv[i])) +
              exp(fish_mz_boot$lsigma_r[i])^2) /
      (exp(fish_mz_boot$lsigma_e[i])^2 +
         exp(fish_mz_boot$lsigma_r[i])^2 +
         exp(fish_mz_boot$lsigma_mu[i])^2)
    tmp <- rbind(tmp,
                 c(i, distance, rho))
  }
}

corr_mz_tbl <- tibble(replicate = tmp[, 1],
                      distance = tmp[, 2],
                      rho = tmp[, 3])

corr_mz_q_tbl <- corr_mz_tbl %>%
  group_by(distance) %>%
  summarise(q025 = quantile(rho, probs = 0.025),
            q05 = quantile(rho, probs = 0.05),
            q25 = quantile(rho, probs = 0.25),
            q5 = quantile(rho, probs = 0.5),
            q75 = quantile(rho, probs = 0.75),
            q95 = quantile(rho, probs = 0.95),
            q975 = quantile(rho, probs = 0.975))


fish_mz_corr_obs <- tibble(distance = unique(dist(parseNumLevels(fish_data$pos_m))),
                           rho = (fish_point_est_z$sigma_e_sq_p *
                                    exp(-fish_point_est_z$eta * distance) +
                                    fish_point_est_z$sigma_r_sq_p) /
                             (fish_point_est_z$sigma_e_sq_p +
                                fish_point_est_z$sigma_r_sq_p +
                                fish_point_est_z$sigma_mu_sq_p))

fish_corr_plot_c <- fish_mz_corr_res %>%
  ggplot(aes(x = distance, y = rho)) +
  geom_ribbon(data = corr_mz_q_tbl, aes(ymin = q05, y = q5, ymax = q95), alpha = 0.2) +
  geom_ribbon(data = corr_mz_q_tbl, aes(ymin = q25, y = q5, ymax = q75), alpha = 0.2) +
  geom_line() +
  geom_point(data = fish_mz_corr_obs,
             fill = "light grey", shape = 21, size = 2) +
  scale_x_continuous(breaks = c(0, seq(5, 35, by = 5))) +
  lims(y = c(0, 1)) +
  labs(x = "Distance (km)", y = "Correlation") +
  theme_bw()

# Total variance ####

tot_var_est <- fish_res_long %>%
  filter(replicate != 83) %>%
  filter(replicate != 32) %>%
  filter(iteration == 30) %>%
  pivot_wider() %>%
  group_by(dimension) %>%
  summarise(avg_est = mean(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p),
            sd_err = mean(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p),
            q05 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p, probs = 0.05),
            q25 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p, probs = 0.25),
            q75 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p, probs = 0.75),
            q95 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p, probs = 0.95))

tot_var_est_quant <- tot_var_est %>%
  pivot_longer(q05:q95) %>%
  add_column(confidence = rep(c("90 percent", "50 percent", "50 percent", "90 percent"), 2)) %>%
  pivot_wider()

fish_corr_plot_b <- tot_var_est %>%
  ggplot(aes(x = dimension, y = avg_est)) +
  geom_col(colour = "black", fill = "light grey") +
  geom_errorbar(data = tot_var_est_quant,
                aes(x = dimension, ymin = q25, ymax = q75,
                    linetype = confidence),
                # position = position_dodge(width = 0.9),
                width = 0.25) +
  geom_errorbar(data = tot_var_est_quant,
                aes(x = dimension, ymin = q05, ymax = q95,
                    linetype = confidence),
                # position = position_dodge(width = 0.9),
                width = 0.25) +
  labs(x = "Dimension", y = "Total variance") +
  guides(linetype = guide_legend("Confidence")) +
  theme_bw()


# Variance partitioning ####

prop_var_est <- fish_res_long %>%
  filter(replicate != 83) %>%
  filter(replicate != 32) %>%
  filter(iteration == 30) %>%
  pivot_wider() %>%
  group_by(dimension) %>%
  summarise(prop_env = mean(sigma_e_sq_p / (sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p)),
            prop_het = mean(sigma_r_sq_p / (sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p)),
            prop_err = mean(sigma_mu_sq_p / (sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p)))

write_csv(prop_var_est, "fish_prop_var_est.csv")

fish_corr_plot_d <- prop_var_est %>%
  pivot_longer(cols = prop_env:prop_err) %>%
  ggplot(aes(x = dimension, y = value)) +
  geom_col(colour = "black", aes(fill = name)) +
  scale_fill_manual(values = c("prop_env" = "dark grey",
                               "prop_het" = "white",
                               "prop_err" = "light grey"),
                    labels = c("Environmental", "Common noise", "Heterogeneity")) +
  labs(x = "Dimension", y = "Proportion") +
  guides(fill = guide_legend("Source")) +
  theme_bw()


library(cowplot)

plot_grid(fish_corr_plot_a,
          fish_corr_plot_b,
          fish_corr_plot_c,
          fish_corr_plot_d, labels = c("A", "B", "C", "D"), rel_widths = c(1.5, 1))

# ggsave("fish_corr_plot.pdf", width = 8, height = 5)