R/example_bats_data_complete.R

# Case study: bat data from BioTIME (Dornelas et al. 2018)

# Require:
# source(file = "R/required_packages.R")
# source(file = "R/f_sim_sad.R")
# source(file = "R/f_glmm_est_cs.R")

# Data preparation ####

# The data can be downloaded from biotime.st-andrews.ac.uk as:

# Study ID 516: Mammals
# A large-scale fragmentation experiment for Neotropical bats

# Load data

bats_data <- read_csv("rawdata_bats_brazil.csv")

# Transform to utm coordinates for spatial distance calculation
latlong_bats <- distinct(dplyr::select(bats_data, LATITUDE, LONGITUDE))

st_bats <- st_sfc(st_multipoint(cbind(latlong_bats$LONGITUDE, latlong_bats$LATITUDE)))

st_crs(st_bats) <- "+proj=longlat +datum=WGS84"

st_bats <- st_transform(st_bats, "+proj=utm +zone=21 ellps=WGS84")

utm_bats <- st_coordinates(st_bats)

# Convert distance to kilometres
ll_utm_bats <- as_tibble(cbind(latlong_bats, utm_bats[, -3]/1000))

bats_data <- left_join(bats_data, ll_utm_bats)

# Create spatial and temporal variables

bats_data <- bats_data %>%
  mutate(date = ymd(paste(YEAR, MONTH, DAY)),
         time_diff = as.numeric(date - min(date)),
         # time = numFactor(time_diff),
         location = numFactor(X, Y),
         # time_y = numFactor(YEAR),
         time_m = numFactor(round(time_diff / 30)))

bats_data <- bats_data %>%
  mutate(description = str_split(SAMPLE_DESC, "_"))

habitat <- bats_data %>% pull(description) %>% flatten_chr()

bats_data <- bats_data %>%
  mutate(habitat = habitat[seq(3,15172, by = 11)]) %>%
  dplyr::select(-description)

rm(habitat)

# If the number of distinct space, time and species combinations
# is equal to the total number of observations, there are no
# repeated measurements and we cannot estimate over-dispersion

# Distinct combinations:

bats_data %>%
  dplyr::select(time_m, GENUS_SPECIES, location) %>%
  n_distinct()


bats_data %>%
  group_by(time_m, location) %>%
  summarise(med_ab = median(ABUNDANCE),
            n_sp = n_distinct(GENUS_SPECIES)) %>%
  ungroup() %>%
  summarise(avg_med_ab = mean(med_ab),
            avg_n_sp = mean(n_sp))

# Number of observations:

nrow(bats_data)

# Add "observer" variable to each combination

bats_data <- bats_data %>%
  group_by(time_m, GENUS_SPECIES, location) %>%
  mutate(observer_m = 1:n()) %>%
  # mutate(observer_m = 1:n())
  ungroup()

# bats_data_sum <- bats_data %>%
#   group_by(time_m, location) %>%
#   summarise(max_obs_m = max(n_distinct(observer_m)))


# Figures to illustrate the raw data (for supplementary material)

inter_bats <- bats_data %>%
  mutate(location = rank(location)) %>%
  dplyr::select(time_m, location) %>%
  arrange(location, time_m) %>%
  distinct() %>%
  mutate(inter = interaction(time_m, location)) %>%
  group_by(location) %>%
  mutate(min_int = inter[1],
         max_int = inter[length(inter)]) %>%
  ungroup() %>%
  dplyr::select(min_int, max_int) %>%
  distinct()

bats_data %>%
  mutate(location = rank(location)) %>%
  ggplot(aes(x = interaction(time_m, location), y = interaction(observer_m, GENUS_SPECIES))) +
  geom_raster(aes(fill = log(ABUNDANCE)), hjust = 1) +
  geom_vline(data = inter_bats, aes(xintercept = min_int), colour = "red") +
  scale_fill_viridis_c("log ab.") +
  labs(x = "All time points at a given location",
       y = "Observer and species") +
  theme(axis.text.x = element_text(hjust = 1, angle = 45))

ggsave("bats_logab_temporal_model.pdf", width = 9, height = 6)

inter_bats <- bats_data %>%
  mutate(location = rank(location)) %>%
  dplyr::select(time_m, location) %>%
  arrange(location, time_m) %>%
  distinct() %>%
  mutate(inter = interaction(location, time_m)) %>%
  group_by(time_m) %>%
  mutate(min_int = inter[1],
         max_int = inter[length(inter)]) %>%
  ungroup() %>%
  dplyr::select(min_int, max_int) %>%
  distinct()

bats_data %>%
  mutate(location = rank(location)) %>%
  ggplot(aes(x = interaction(location, time_m), y = interaction(observer_m, GENUS_SPECIES))) +
  geom_raster(aes(fill = log(ABUNDANCE)), hjust = 1) +
  geom_vline(data = inter_bats, aes(xintercept = min_int), colour = "red") +
  scale_fill_viridis_c("log ab.") +
  labs(x = "All locations at a given time point",
       y = "Observer and species") +
  theme(axis.text.x = element_text(hjust = 1, angle = 45))

ggsave("bats_logab_spatial_model.pdf", width = 9, height = 6)

bats_data %>%
  mutate(location = rank(location)) %>%
  ggplot(aes(x = interaction(time_m, location), y = interaction(observer_m, GENUS_SPECIES))) +
  geom_raster(aes(fill = log(ABUNDANCE))) +
  geom_vline(aes(xintercept = which(interaction(time_m, location) %in% inter_bats$min_int)), colour = "red") +
  scale_fill_viridis_c() +
  theme(axis.text.x = element_text(hjust = 1, angle = 45))

interaction(bats_data$time_m, bats_data$location)[which(interaction(bats_data$time_m, bats_data$location) %in% inter_bats$min_int)]


# Renaming to match the names used in simulations of the bias correction

bats_data <- bats_data %>%
  rename(abundance = ABUNDANCE,
         year_m = time_m,
         pos_m = location,
         species = GENUS_SPECIES)

# Estimation ####

# bats_mt_dm_long <- list()
# bats_mz_dm_long <- list()
# for (i in 1:100){
#
#   bats_mt_dm_long[[i]] <- try(f_est_sad_mt_cs_v2_dm(data = bats_data,
#                                                     n_boot = 30,
#                                                     n_rep = 1,
#                                                     n_sp_mult = 1))
#
#   bats_mz_dm_long[[i]] <- try(f_est_sad_mz_cs_v2_dm(data = bats_data,
#                                                     n_boot = 30,
#                                                     n_rep = 1,
#                                                     n_sp_mult = 1))
# }

# bats_point_est_wide <- bats_point_est %>%
#   pivot_wider(values_from = avg_est)
#
# bats_boot_mt_input <- list(n_sp = n_distinct(bats_data$species), # number of species
#                            n_year =  unique(parseNumLevels(bats_data$year_m))[order(unique(parseNumLevels(bats_data$year_m)))], # number of time points
#                            n_loc = n_distinct(bats_data$pos_m), # number of locations
#                            n_obs = 2, # number of locations
#                            mu = bats_point_est_wide$mu[2], # mean log abundance
#                            mu_sigma = bats_point_est_wide$sigma_mu_sq_p[2], # mean log abundance
#                            sigma_r_sq = bats_point_est_wide$sigma_r_sq_p[2], # species heterogeneity
#                            sigma_s_sq = bats_point_est_wide$sigma_e_sq_p[2], # environmental variance
#                            sigma_d_sq = bats_point_est_wide$sigma_d_sq_p[2], # sampling variance
#                            delta = bats_point_est_wide$delta[2], # "strength of density regulation"
#                            eta = 0,
#                            pos_x = NULL,
#                            pos_y = NULL) # 1 / "spatial scaling of noise"
#
# bats_boot_mz_input <- list(n_sp = n_distinct(bats_data$species), # number of species
#                            n_year =   n_distinct(bats_data$year_m), # number of time points
#                            n_loc = n_distinct(bats_data$pos_m), # number of locations
#                            n_obs = 2, # number of locations
#                            mu = bats_point_est_wide$mu[1], # mean log abundance
#                            mu_sigma = bats_point_est_wide$sigma_mu_sq_p[1], # mean log abundance
#                            sigma_r_sq = bats_point_est_wide$sigma_r_sq_p[1], # species heterogeneity
#                            sigma_s_sq = bats_point_est_wide$sigma_e_sq_p[1], # environmental variance
#                            sigma_d_sq = bats_point_est_wide$sigma_d_sq_p[2], # sampling variance
#                            delta = 0, # "strength of density regulation"
#                            eta = bats_point_est_wide$eta[1],
#                            pos_x = unique(parseNumLevels(bats_data$pos_m))[, 1],
#                            pos_y = unique(parseNumLevels(bats_data$pos_m))[, 2]) # 1 / "spatial scaling of noise"
#
#
# bats_mt_boot_dm_long <- list()
# bats_mz_boot_dm_long <- list()
# for (i in 1:100){
#
#   bats_data_mt_boot <- f_sim_sad_fast(input = bats_boot_mt_input, time = "fixed", dependency = "time", var_mu = TRUE)
#   bats_data_mz_boot <- f_sim_sad_fast(input = bats_boot_mz_input, space = "fixed", dependency = "space", var_mu = TRUE)
#
#   bats_mt_boot_dm_long[[i]] <- try(f_est_sad_mt_cs_v2_dm(data = rename(bats_data_mt_boot,
#                                                                        observer_m = observer),
#                                                          n_boot = 30,
#                                                          n_rep = 1,
#                                                          n_sp_mult = 1))
#
#   bats_mz_boot_dm_long[[i]] <- try(f_est_sad_mz_cs_v2_dm(data = rename(bats_data_mz_boot,
#                                                                        observer_m = observer),
#                                                          n_boot = 30,
#                                                          n_rep = 1,
#                                                          n_sp_mult = 1))
# }

# bats_mt_dm_long_part01 <- read_rds("bats_mt_dm_long_part01.rds")
# bats_mz_dm_long_part01 <- read_rds("bats_mz_dm_long_part01.rds")
# bats_mt_dm_long_part02 <- read_rds("bats_mt_dm_long_part02.rds")
# bats_mz_dm_long_part02 <- read_rds("bats_mz_dm_long_part02.rds")
# bats_mt_dm_long_part03 <- read_rds("bats_mt_dm_long_part03.rds")
# bats_mz_dm_long_part03 <- read_rds("bats_mz_dm_long_part03.rds")

bats_mt_dm_long_part01 <- read_rds("bats_mt_boot_dm_long_part01.rds")
bats_mz_dm_long_part01 <- read_rds("bats_mz_boot_dm_long_part01.rds")
bats_mt_dm_long_part02 <- read_rds("bats_mt_boot_dm_long_part02.rds")
bats_mz_dm_long_part02 <- read_rds("bats_mz_boot_dm_long_part02.rds")
bats_mt_dm_long_part03 <- read_rds("bats_mt_boot_dm_long_part03.rds")
bats_mz_dm_long_part03 <- read_rds("bats_mz_boot_dm_long_part03.rds")

bats_mt_dm_long_res <- c()
bats_mz_dm_long_res <- c()
for (i in 1:length(bats_mt_dm_long_part01)){
  if (is_tibble(bats_mt_dm_long_part01[[i]])){
    bats_mt_dm_long_res <- bind_rows(bats_mt_dm_long_res,
                                  add_column(bats_mt_dm_long_part01[[i]],
                                             replicate = i,
                                             iteration = 0:(nrow(bats_mt_dm_long_part01[[i]]) - 1)))
  }
  if (is_tibble(bats_mz_dm_long_part01[[i]])){
    bats_mz_dm_long_res <- bind_rows(bats_mz_dm_long_res,
                                  add_column(bats_mz_dm_long_part01[[i]],
                                             replicate = i,
                                             iteration = 0:(nrow(bats_mz_dm_long_part01[[i]]) - 1)))
  }
}
for (i in 1:length(bats_mt_dm_long_part02)){
  if (is_tibble(bats_mt_dm_long_part02[[i]])){
    bats_mt_dm_long_res <- bind_rows(bats_mt_dm_long_res,
                                  add_column(bats_mt_dm_long_part02[[i]],
                                             replicate = i + length(bats_mt_dm_long_part01),
                                             iteration = 0:(nrow(bats_mt_dm_long_part02[[i]]) - 1)))
  }
  if (is_tibble(bats_mz_dm_long_part02[[i]])){
    bats_mz_dm_long_res <- bind_rows(bats_mz_dm_long_res,
                                  add_column(bats_mz_dm_long_part02[[i]],
                                             replicate = i + length(bats_mt_dm_long_part01),
                                             iteration = 0:(nrow(bats_mz_dm_long_part02[[i]]) - 1)))
  }
}
for (i in 1:length(bats_mt_dm_long_part03)){
  if (is_tibble(bats_mt_dm_long_part03[[i]])){
    bats_mt_dm_long_res <- bind_rows(bats_mt_dm_long_res,
                                  add_column(bats_mt_dm_long_part03[[i]],
                                             replicate = i + length(bats_mt_dm_long_part01) + length(bats_mt_dm_long_part02),
                                             iteration = 0:(nrow(bats_mt_dm_long_part03[[i]]) - 1)))
  }
  if (is_tibble(bats_mz_dm_long_part03[[i]])){
    bats_mz_dm_long_res <- bind_rows(bats_mz_dm_long_res,
                                  add_column(bats_mz_dm_long_part03[[i]],
                                             replicate = i + length(bats_mt_dm_long_part01) + length(bats_mt_dm_long_part02),
                                             iteration = 0:(nrow(bats_mz_dm_long_part03[[i]]) - 1)))
  }
}

# Convergence figure ####
# bind_rows(add_column(bats_mt_dm_long_res, dimension = "Temporal"),
#                            add_column(bats_mz_dm_long_res, dimension = "Spatial")) %>%
#   pivot_longer(cols = c(mu:lsigma_mu, letainv)) %>%
#   ggplot(aes(x = iteration, y = value)) +
#   geom_line(aes(group = interaction(dimension, replicate)), alpha = 0.5) +
#   facet_grid(name ~ dimension, scales = "free_y") +
#   theme_bw()
#
# ggsave("appendix_convergence_bats_data.pdf", width = 6, height = 9)

bats_res_long <- bind_rows(add_column(bats_mt_dm_long_res, dimension = "Temporal"),
                           add_column(bats_mz_dm_long_res, dimension = "Spatial")) %>%
  mutate(sigma_e_sq_p = exp(lsigma_e)^2,
         "delta" = ifelse(is.na(ldelta), NA, exp(ldelta)),
         "eta" = ifelse(is.na(letainv), NA, 1 / exp(letainv)),
         sigma_r_sq_p = ifelse(is.na(lsigma_r), NA, exp(lsigma_r)^2),
         sigma_d_sq_p = ifelse(is.na(lsigma_d), NA, exp(lsigma_d)^2),
         sigma_mu_sq_p = ifelse(is.na(lsigma_mu), NA, exp(lsigma_mu)^2),
         "sigma[m]^2" = ifelse(is.na(lsigma_mu), NA, exp(lsigma_mu)^2),
         "sigma[d]^2" = ifelse(is.na(lsigma_d), NA, exp(lsigma_d)^2),
         "sigma[e]^2" = ifelse(is.na(delta), NA, sigma_e_sq_p * (2 * delta)),
         "sigma[r]^2" = ifelse(is.na(delta), NA, sigma_r_sq_p * (delta ^ 2)),
         ret_time = 1 / delta,
         spat_scal = 1 / eta) %>%
  pivot_longer(cols = c(mu:lsigma_d, lsigma_mu, letainv, sigma_e_sq_p:spat_scal))

bats_est_sum <- bats_res_long %>%
  group_by(dimension, name, iteration) %>%
  summarise(avg_est = mean(value, na.rm = TRUE),
            sd_err = sd(value, na.rm = TRUE),
            med_est = median(value, na.rm = TRUE),
            q05_est = quantile(value, probs = 0.05, na.rm = TRUE),
            q25_est = quantile(value, probs = 0.25, na.rm = TRUE),
            q75_est = quantile(value, probs = 0.75, na.rm = TRUE),
            q95_est = quantile(value, probs = 0.95, na.rm = TRUE))

# Point estimates ####

bats_point_est <- bats_est_sum %>%
  filter(iteration == 30) %>%
  dplyr::select(dimension, name, avg_est)

# write_csv(filter(bats_est_sum, iteration == 30), "bats_est.csv")
write_csv(filter(bats_est_sum, iteration == 30), "bats_boot_est.csv")

# Histogram of parameter estimates based on full model ####
# accounting for variation in mean values between space-time points

bats_res_long %>%
  filter(iteration == 30) %>%
  # filter(name %in% c("delta", "eta", "sigma[m]^2", "sigma[e]^2", "sigma[r]^2", "sigma[d]^2", "mu")) %>%
  filter(name %in% c("ldelta", "letainv", "lsigma_mu", "lsigma_e", "lsigma_r", "lsigma_d", "mu")) %>%
  ggplot(aes(x = value)) +
  geom_histogram(bins = 10, colour = "black", fill = "light grey") +
  geom_vline(data = filter(bats_point_est,
                           name %in% c("ldelta",
                                       "letainv",
                                       "lsigma_mu",
                                       "lsigma_e",
                                       "lsigma_r",
                                       "lsigma_d",
                                       "mu")),
             aes(xintercept = avg_est)) +
  # geom_vline(data = filter(bats_point_est,
  #                          name %in% c("delta",
  #                                      "eta",
  #                                      "sigma[m]^2",
  #                                      "sigma[e]^2",
  #                                      "sigma[r]^2",
  #                                      "sigma[d]^2",
  #                                      "mu")),
  #            aes(xintercept = avg_est)) +
  facet_grid(dimension ~ name, scales = "free") +
  theme_bw()

ggsave("bats_data_parameter_uncertainty.pdf", width = 8, height = 5)

# Temporal correlation ####

bats_point_est_t <- bats_point_est %>%
  filter(dimension == "Temporal") %>%
  pivot_wider(values_from = "avg_est")

bats_mt_corr <- tibble(time = 0:max(dist(parseNumLevels(bats_data$year_m))),
                       rho = (bats_point_est_t$sigma_e_sq_p *
                                exp(-bats_point_est_t$delta * time) +
                                bats_point_est_t$sigma_r_sq_p) /
                         (bats_point_est_t$sigma_e_sq_p +
                            bats_point_est_t$sigma_r_sq_p +
                            bats_point_est_t$sigma_mu_sq_p +
                            bats_point_est_t$sigma_d_sq_p))

bats_mt_corr_res <- bind_rows(add_column(bats_mt_corr, dimension = "Temporal"))

bats_mt_boot <- bats_res_long %>%
  filter(iteration == 30 &
           dimension == "Temporal") %>%
  pivot_wider()

tmp <- c()
for (i in 1:nrow(bats_mt_boot)){
  for (time in c(0, seq(10, 190, by = 10), max(dist(parseNumLevels(bats_data$year_m))))){
    rho <- (exp(bats_mt_boot$lsigma_e[i])^2 *
              exp(-exp(bats_mt_boot$ldelta[i]) * time) +
              exp(bats_mt_boot$lsigma_r[i])^2) /
      (exp(bats_mt_boot$lsigma_e[i])^2 +
         exp(bats_mt_boot$lsigma_r[i])^2 +
         exp(bats_mt_boot$lsigma_mu[i])^2 +
         exp(bats_mt_boot$lsigma_d[i])^2)
    tmp <- rbind(tmp,
                 c(i, time, rho))
  }
}

corr_mt_tbl <- tibble(replicate = tmp[, 1],
                      time = tmp[, 2],
                      rho = tmp[, 3])

corr_mt_q_tbl <- corr_mt_tbl %>%
  group_by(time) %>%
  summarise(q025 = quantile(rho, probs = 0.025),
            q05 = quantile(rho, probs = 0.05),
            q25 = quantile(rho, probs = 0.25),
            q5 = quantile(rho, probs = 0.5),
            q75 = quantile(rho, probs = 0.75),
            q95 = quantile(rho, probs = 0.95),
            q975 = quantile(rho, probs = 0.975))

bats_mt_corr_obs <- tibble(time = unique(dist(parseNumLevels(bats_data$year_m))),
                           rho = (bats_point_est_t$sigma_e_sq_p *
                                    exp(-bats_point_est_t$delta * time) +
                                    bats_point_est_t$sigma_r_sq_p) /
                             (bats_point_est_t$sigma_e_sq_p +
                                bats_point_est_t$sigma_r_sq_p +
                                bats_point_est_t$sigma_mu_sq_p +
                                bats_point_est_t$sigma_d_sq_p))

bats_corr_plot_a <- bats_mt_corr_res %>%
  ggplot(aes(x = time, y = rho)) +
  geom_ribbon(data = corr_mt_q_tbl, aes(ymin = q05, y = q5, ymax = q95), alpha = 0.2) +
  geom_ribbon(data = corr_mt_q_tbl, aes(ymin = q25, y = q5, ymax = q75), alpha = 0.2) +
  geom_line() +
  geom_point(data = bats_mt_corr_obs,
             fill = "light grey", shape = 21, size = 2) +
  scale_x_continuous(breaks = c(0, seq(25, 190, by = 25))) +
  lims(y = c(0, 1)) +
  labs(x = "Time (months)", y = "Correlation") +
  theme_bw()

# Spatial correlation ####

bats_point_est_z <- bats_point_est %>%
  filter(dimension == "Spatial") %>%
  pivot_wider(values_from = "avg_est")

bats_mz_corr <- tibble(distance = 0:max(dist(parseNumLevels(bats_data$pos_m))),
                       rho = (bats_point_est_z$sigma_e_sq_p *
                                exp(-bats_point_est_z$eta * distance) +
                                bats_point_est_z$sigma_r_sq_p) /
                         (bats_point_est_z$sigma_e_sq_p +
                            bats_point_est_z$sigma_r_sq_p +
                            bats_point_est_z$sigma_mu_sq_p +
                            bats_point_est_z$sigma_d_sq_p))

bats_mz_corr_res <- bind_rows(add_column(bats_mz_corr, dimension = "Spatial"))

bats_mz_boot <- bats_res_long %>%
  filter(iteration == 30 &
           dimension == "Spatial") %>%
  pivot_wider()

tmp <- c()
for (i in 1:nrow(bats_mz_boot)){
  for (distance in c(0, seq(1, 42, by = 1), max(dist(parseNumLevels(bats_data$pos_m))))){
    rho <- (exp(bats_mz_boot$lsigma_e[i])^2 *
              exp(- distance / exp(bats_mz_boot$letainv[i])) +
              exp(bats_mz_boot$lsigma_r[i])^2) /
      (exp(bats_mz_boot$lsigma_e[i])^2 +
         exp(bats_mz_boot$lsigma_r[i])^2 +
         exp(bats_mz_boot$lsigma_mu[i])^2 +
         exp(bats_mz_boot$lsigma_d[i])^2)
    tmp <- rbind(tmp,
                 c(i, distance, rho))
  }
}

corr_mz_tbl <- tibble(replicate = tmp[, 1],
                      distance = tmp[, 2],
                      rho = tmp[, 3])

corr_mz_q_tbl <- corr_mz_tbl %>%
  group_by(distance) %>%
  summarise(q025 = quantile(rho, probs = 0.025),
            q05 = quantile(rho, probs = 0.05),
            q25 = quantile(rho, probs = 0.25),
            q5 = quantile(rho, probs = 0.5),
            q75 = quantile(rho, probs = 0.75),
            q95 = quantile(rho, probs = 0.95),
            q975 = quantile(rho, probs = 0.975))


bats_mz_corr_obs <- tibble(distance = unique(dist(parseNumLevels(bats_data$pos_m))),
                           rho = (bats_point_est_z$sigma_e_sq_p *
                                    exp(-bats_point_est_z$eta * distance) +
                                    bats_point_est_z$sigma_r_sq_p) /
                             (bats_point_est_z$sigma_e_sq_p +
                                bats_point_est_z$sigma_r_sq_p +
                                bats_point_est_z$sigma_mu_sq_p +
                                bats_point_est_z$sigma_d_sq_p))

bats_corr_plot_c <- bats_mz_corr_res %>%
  ggplot(aes(x = distance, y = rho)) +
  geom_ribbon(data = corr_mz_q_tbl, aes(ymin = q05, y = q5, ymax = q95), alpha = 0.2) +
  geom_ribbon(data = corr_mz_q_tbl, aes(ymin = q25, y = q5, ymax = q75), alpha = 0.2) +
  geom_line() +
  geom_point(data = bats_mz_corr_obs,
             fill = "light grey", shape = 21, size = 2) +
  scale_x_continuous(breaks = c(0, seq(5, 35, by = 5))) +
  lims(y = c(0, 1)) +
  labs(x = "Distance (km)", y = "Correlation") +
  theme_bw()

# Total variance ####

tot_var_est <- bats_res_long %>%
  filter(iteration == 30) %>%
  pivot_wider() %>%
  group_by(dimension) %>%
  summarise(avg_est = mean(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p),
            sd_err = mean(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p),
            q05 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p, probs = 0.05),
            q25 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p, probs = 0.25),
            q75 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p, probs = 0.75),
            q95 = quantile(sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p, probs = 0.95))

tot_var_est_quant <- tot_var_est %>%
  pivot_longer(q05:q95) %>%
  add_column(confidence = rep(c("90 percent", "50 percent", "50 percent", "90 percent"), 2)) %>%
  pivot_wider()

bats_corr_plot_b <- tot_var_est %>%
  ggplot(aes(x = dimension, y = avg_est)) +
  geom_col(colour = "black", fill = "light grey") +
  geom_errorbar(data = tot_var_est_quant,
                aes(x = dimension, ymin = q25, ymax = q75,
                    linetype = confidence),
                # position = position_dodge(width = 0.9),
                width = 0.25) +
  geom_errorbar(data = tot_var_est_quant,
                aes(x = dimension, ymin = q05, ymax = q95,
                    linetype = confidence),
                # position = position_dodge(width = 0.9),
                width = 0.25) +
  labs(x = "Dimension", y = "Total variance") +
  guides(linetype = guide_legend("Confidence")) +
  theme_bw()


# Variance partitioning ####

prop_var_est <- bats_res_long %>%
  filter(iteration == 30) %>%
  pivot_wider() %>%
  group_by(dimension) %>%
  summarise(prop_env = mean(sigma_e_sq_p / (sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p)),
            prop_het = mean(sigma_r_sq_p / (sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p)),
            prop_err = mean(sigma_mu_sq_p / (sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p)),
            prop_disp = mean(sigma_d_sq_p / (sigma_e_sq_p + sigma_r_sq_p + sigma_mu_sq_p + sigma_d_sq_p)))

write_csv(prop_var_est, "bats_prop_var_est.csv")

bats_corr_plot_d <- prop_var_est %>%
  pivot_longer(cols = prop_env:prop_disp) %>%
  ggplot(aes(x = dimension, y = value)) +
  geom_col(colour = "black", aes(fill = name)) +
  scale_fill_manual(values = c("prop_env" = "dark grey",
                               "prop_het" = "white",
                               "prop_err" = "light grey",
                               "prop_disp" = "grey"),
                    labels = c("Over-dispersion", "Environmental", "Common noise", "Heterogeneity")) +
  labs(x = "Dimension", y = "Proportion") +
  guides(fill = guide_legend("Source")) +
  theme_bw()


library(cowplot)

plot_grid(bats_corr_plot_a,
          bats_corr_plot_b,
          bats_corr_plot_c,
          bats_corr_plot_d, labels = c("A", "B", "C", "D"), rel_widths = c(1.5, 1))

ggsave("bats_corr_plot.pdf", width = 8, height = 5)
erikbsolbu/dynamicSAD documentation built on Jan. 25, 2021, 5 a.m.