example_analyses/noro_rota_berlin.R

##### This file serves to reproduce parts of the analysis from
##### Bracher/Held (2018+): Multivariate endemic-epidemic models
##### with higher-order lags and an application to outbreak detection

# install the package hhh4addon:
# library(devtools)
# install_github("jbracher/hhh4addon", build_vignettes = TRUE)

#########################################
# get and format data:
library(surveillance)
library(hhh4addon)
library(RColorBrewer)

setwd("/home/johannes/Documents/hhh4predict/Theory/Article_Theory/data_analysis")
# The file auxiliary functions.R needs to be available (equally available in example_analyses.R)
source("auxiliary_functions.R")

library(hhh4contacts)
data("noroBE")
data("rotaBE")

# modify neighbourhood matrices to apply power law:
noroBE@neighbourhood <- noroBE@neighbourhood + 1
rotaBE@neighbourhood <- rotaBE@neighbourhood + 1

data <- list(noro = noroBE, rota = rotaBE)
names_diseases <- names(data)

library(RColorBrewer)
cols_model_versions <- brewer.pal(8, "Dark2")[1:4]
cols_lag_structures <- brewer.pal(8, "Dark2")[c(8, 5:7)]

#########################################
# fit models:

# specify controls:
names_model_versions <- c("full", "simple_seas", "no_cross", "neither")
names_lag_structures <- c("ar1", "ar2", "geom", "pois")
cols_model_versions <- brewer.pal(8, "Dark2")[1:4]
cols_lag_structures <- brewer.pal(8, "Dark2")[c(8, 5:7)]

# define variable for Christmas breaks:
christmas <- numeric(nrow(noroBE@observed))
christmas[(seq_along(christmas) %% 52) %in% c(0, 1)] <- 1

# number of sine/cosine waves to include in the endemic and epidemic components:
S_end <- 1
S_epid <- 1

# full model (a)
ctrl_full <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
                  ar = list(f = ~-1),
                  ne = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_epid),
                            weights = W_powerlaw(maxlag=5, normalize=TRUE, log=TRUE)),
                  family = "NegBin1",
                  subset = 6:nrow(noroBE@observed),
                  data = list(christmas = christmas))

# model (b) with simplified seasonality (no seasonality in epidemic parameters)
ctrl_simple_seas <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
                         ar = list(f = ~-1),
                         ne = list(f = ~0 + fe(1, unitSpecific = TRUE),
                                   weights = W_powerlaw(maxlag=5, normalize=TRUE, log=TRUE)),
                         family = "NegBin1",
                         subset = 6:nrow(noroBE@observed),
                         data = list(christmas = christmas))

# model (c) without cross lags
ctrl_no_cross <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
                      ne = list(f = ~-1),
                      ar = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_epid)),
                      family = "NegBin1",
                      subset = 6:nrow(noroBE@observed),
                      data = list(christmas = christmas))

# model (d) with neither cross lags nor seasonality in epidemic parameters
ctrl_neither <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
                     ne = list(f = ~-1),
                     ar = list(f = ~0 + fe(1, unitSpecific = TRUE)),
                     family = "NegBin1",
                     subset = 6:nrow(noroBE@observed),
                     data = list(christmas = christmas))

controls <- list(full = ctrl_full,
                 simple_seas = ctrl_simple_seas,
                 no_cross = ctrl_no_cross,
                 neither = ctrl_neither)

# fit all models (this takes a while):
return_full_cov <- FALSE
fits <- full_cov_matrices <- list()
for(disease in names_diseases){
  print(paste("--- Started", disease, "---"))
  for(model_version in names_model_versions){
    # ar1 lag structure:
    print(paste(model_version, "ar1"))
    fits[[disease]][[model_version]]$ar1 <- hhh4(data[[disease]], controls[[model_version]])
    # ar2 lag structure:
    print(paste(model_version, "ar2"))
    control_ar2_temp <- controls[[model_version]]; control_ar2_temp$funct_lag <- ar2_lag
    fit_temp <- profile_par_lag(data[[disease]], control_ar2_temp, return_full_cov = return_full_cov)
    if(return_full_cov){
      fits[[disease]][[model_version]]$ar2 <- fit_temp$best_mod
      full_cov_matrices[[disease]][[model_version]]$ar2 <- fit_temp$cov
    }else{
      fits[[disease]][[model_version]]$ar2 <- fit_temp
    }

    # geometric lag structure:
    print(paste(model_version, "geom"))
    control_geom_temp <- controls[[model_version]]; control_geom_temp$funct_lag <- geometric_lag
    fit_temp <- profile_par_lag(data[[disease]], control_geom_temp, return_full_cov = return_full_cov)
    if(return_full_cov){
      fits[[disease]][[model_version]]$geom <- fit_temp$best_mod
      full_cov_matrices[[disease]][[model_version]]$geom <- fit_temp$cov
    }else{
      fits[[disease]][[model_version]]$geom <- fit_temp
    }


    # Poisson lag structure:
    print(paste(model_version, "pois"))
    control_pois_temp <- controls[[model_version]]; control_pois_temp$funct_lag <- poisson_lag
    fit_temp <- profile_par_lag(data[[disease]], control_pois_temp, return_full_cov = return_full_cov)
    if(return_full_cov){
      fits[[disease]][[model_version]]$pois <- fit_temp$best_mod
      full_cov_matrices[[disease]][[model_version]]$pois <- fit_temp$cov
    }else{
      fits[[disease]][[model_version]]$pois <- fit_temp
    }
  }
}

#########################################
# Assessing models fits etc.

# obtain AICs, lag weights, neighbourhood weights:
AICs <- overdisp <- neighbourhood_weights <- lag_weights <- list()

for(disease in names_diseases){
  AICs[[disease]] <- overdisp[[disease]] <-
    matrix(nrow = length(names_lag_structures), ncol = length(names_model_versions),
           dimnames = list(names_lag_structures, names_model_versions))
  neighbourhood_weights[[disease]] <- lag_weights[[disease]] <-
    array(dim = c(length(names_lag_structures), length(names_model_versions), 5),
          dimnames = list(names_lag_structures, names_model_versions, paste0("lag.", 1:5)))
  for(model_version in names_model_versions){
    for(lag_structure in names_lag_structures){
      AICs[[disease]][lag_structure, model_version] <- AIC(fits[[disease]][[model_version]][[lag_structure]])

      overdisp[[disease]][lag_structure, model_version] <-
        fits[[disease]][[model_version]][[lag_structure]]$coefficients["-log(overdisp)"]

      neighbourhood_weights[[disease]][lag_structure, model_version, ] <-
        (1:5)^-exp(fits[[disease]][[model_version]][[lag_structure]]$coefficients["neweights.d"])

      lag_weights[[disease]][lag_structure, model_version, ] <-  if(lag_structure == "ar1"){
        c(1, 0, 0, 0, 0)
      }else{
        fits[[disease]][[model_version]][[lag_structure]]$distr_lag
      }

    }
  }
}

lag_weights

Delta_AICs_noro <- AICs$noro - max(AICs$noro)
Delta_AICs_rota <- AICs$rota - max(AICs$rota)

# Observations:
# - more weight on higher lags
# - different weighting schemes now differ a bit more
# - weighted lag models borrow less strength across regions
# - slightly decreased overdispersion, i.e. less unexplained variation

#########################################
# Analyse periodically stationary moments:

# compute empirical moments:
emp_mom <- list()
emp_mom$noro <- get_emp_moments(noroBE, start = 1)
emp_mom$rota <- get_emp_moments(rotaBE, start = 1)

# compute model-based moments (both for single observations and calendar-week-wise means):
sm <- sm_of_means <- list()
for(disease in names_diseases){
  print(disease)
  for(model_version in names_model_versions){
    print(model_version)
    for(lag_structure in names_lag_structures){
      print(lag_structure)
      sm[[disease]][[model_version]][[lag_structure]] <-
        stationary_moments(fits[[disease]][[model_version]][[lag_structure]], return_Sigma = TRUE)
      sm_of_means[[disease]][[model_version]][[lag_structure]] <-
        compute_sm_of_means(fits[[disease]][[model_version]][[lag_structure]], n_seasons = 7, return_Sigma = TRUE)
    }
  }
}
# save(sm_of_means, file = "../model_fits/sm_of_means_nb1.rda") # with faster version actually no need to store this separately.
# load("sm_of_means_nb1.rda")

# plots for comparison:
disease <- "rota"
# means:
par(mfrow = 3:4)
for(unit in 1:12){
  plot_stat_means(sm, emp_mom, "rota", unit)
}

par(mfrow = 3:4)
par(mfrow = 3:4)
for(unit in 1:12){
  plot_stat_sds(sm, emp_mom, "rota", unit)
}

#########################################
# p-values

# Compute approximative p-values as given in the Supplementary Material
# (the simulation-based ones from the article were computed
# using some custom functions not included in the package and computations are quite long)
teststats <- pvals_approx_liberal <- pvals_approx_conservative <-
  teststats_trafo <- pvals_trafo_liberal <- pvals_trafo_conservative <- list()
for(disease in names_diseases){
  pvals_approx_liberal[[disease]] <- teststats[[disease]] <-
    matrix(nrow = length(names_lag_structures), ncol = length(names_model_versions),
           dimnames = list(names_lag_structures, names_model_versions))
  for(model_version in names_model_versions){
    for(lag_structure in names_lag_structures){
      testres_liberal_temp <- get_pval(stat_mom_of_means = sm_of_means[[disease]][[model_version]][[lag_structure]],
                                       emp_mom = emp_mom[[disease]],
                                       correction_df = fits[[disease]][[model_version]][[lag_structure]]$dim[1])
      pvals_approx_liberal[[disease]][lag_structure, model_version] <- testres_liberal_temp$pval
      teststats[[disease]][lag_structure, model_version] <- testres_liberal_temp$test_stat
    }
  }
}


########################################
# Analysis of conventional Pearson residuals

pearson_resids <- acfs <- list()
for(disease in names_diseases){
  for(model_version in names_model_versions){
    for(lag_structure in names_lag_structures){
      size_temp <- exp(-fits[[disease]][[model_version]][[lag_structure]]$coefficients["-log(overdisp)"])
      fitted_values_temp <- fits[[disease]][[model_version]][[lag_structure]]$fitted.values
      cond_sds_temp <- sqrt(fitted_values_temp + size_temp*fitted_values_temp^2)
      obs_values_temp <- data[[disease]]@observed[-(1:5), ]
      pearson_resids_temp <- (fitted_values_temp - obs_values_temp)/cond_sds_temp
      pearson_resids[[disease]][[lag_structure]][[model_version]] <- pearson_resids_temp
      pearson_resids[[disease]][[lag_structure]][[model_version]] <- my_acf(pearson_resids_temp)
    }
  }
}

# choose which model to display:
model_version <- "full"
cols_lag_structures <- c("black", "orange", "purple", "chartreuse3")
lwd_weights <- 2
unit1 <- "pank"
unit2 <- "span"

par(mfrow = c(2, 2))
myplot_acf(pearson_resids$noro$ar1[[model_version]], unit = unit1, main = paste("(a) Norovirus,", unit1),
           shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$noro$ar2[[model_version]], unit = unit1,
           add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$noro$geom[[model_version]], unit = unit1,
           add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$noro$pois[[model_version]], unit = unit1,
           add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])

myplot_acf(pearson_resids$noro$ar1[[model_version]], unit = "span", main = paste("(b) Norovirus,", unit2),
           shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$noro$ar2[[model_version]], unit = "span",
           add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$noro$geom[[model_version]], unit = "span",
           add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$noro$pois[[model_version]], unit = "span",
           add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])

myplot_acf(pearson_resids$rota$ar1[[model_version]], unit = unit1, main = paste("(c) Rotavirus,", unit1),
           shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$rota$ar2[[model_version]], unit = unit1,
           add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$rota$geom[[model_version]], unit = unit1,
           add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$rota$pois[[model_version]], unit = unit1,
           add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])

myplot_acf(pearson_resids$rota$ar1[[model_version]], unit = unit2, main = paste("(d) Rotavirus,", unit2),
           shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$rota$ar2[[model_version]], unit = unit2,
           add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$rota$geom[[model_version]], unit = unit2,
           add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$rota$pois[[model_version]], unit = unit2,
           add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])


##########
# New Pearson residuals relative to stationary moments
library(RColorBrewer)

# noro

par(mar = c(1, 4.5, 2, 1), font.main = 1, family = "serif", las = 1)
layout_matr <- matrix(c(1, 2,
                        1, 2,
                        1, 2,
                        3, 4,
                        3, 4,
                        3, 4,
                        5, 6,
                        5, 6,
                        5, 6,
                        5, 6#,
                        # 7, 7,
                        # 7, 7
), byrow = TRUE, ncol = 2)
layout(layout_matr)
ylim_mu <- c(0, 25)
ylim_sd <- c(0, 15)

plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit1, ylim = ylim_mu)
title("(a) Pankow")
legend("top", col = c(cols_model_versions[1:4], "black"),
       legend = c("Model (a)   ", "Model (b)",
                  "Model (c)   ", "Model (d)"),
       lty = c(1, 1, 1, 1), pch = c(NA, NA, NA, NA), ncol = 2, cex = 1, bty = "n")

plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit2, ylim = ylim_mu)
title("(b) Spandau")

plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit1, ylim = ylim_sd)
# mtext("(a) Pankow", line = 1.3, at = -10, cex = 0.8)
plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit2, ylim = ylim_sd)
# mtext("(b) Spandau", line = 1.3, at = -10, cex = 0.8)

par(mar = c(4.3, 4.5, 2, 1), font.main = 1, family = "serif", las = 1)
plot_stat_resids(sm_of_means, emp_mom, "noro",
                 "full", "geom",
                 "simple_seas", "geom",
                 unit1)

plot_stat_resids(sm_of_means, emp_mom, "noro",
                 "full", "geom",
                 "simple_seas", "geom",
                 unit2)

# rota:

plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit1, ylim = ylim_mu)
title("(a) Pankow")
legend("topright", col = c(cols_model_versions[1:4], "black"),
       legend = c("Model (a)   ", "Model (b)",
                  "Model (c)   ", "Model (d)"),
       lty = c(1, 1, 1, 1), pch = c(NA, NA, NA, NA), ncol = 2, cex = 1, bty = "n")

plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit2, ylim = ylim_mu)
title("(b) Spandau")


plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit1, ylim = ylim_sd)
# mtext("(a) Pankow", line = 1.3, at = -10, cex = 0.8)
plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit2, ylim = ylim_sd)
# mtext("(b) Spandau", line = 1.3, at = -10, cex = 0.8)

par(mar = c(4.3, 4.5, 2, 1), font.main = 1, family = "serif", las = 1)
plot_stat_resids(sm_of_means, emp_mom, "rota",
                 "full", "geom",
                 "simple_seas", "geom",
                 unit1)

plot_stat_resids(sm_of_means, emp_mom, "rota",
                 "full", "geom",
                 "simple_seas", "geom",
                 unit2)
jbracher/hhh4addon documentation built on Feb. 16, 2024, 1:45 a.m.