##### This file serves to reproduce parts of the analysis from
##### Bracher/Held (2018+): Multivariate endemic-epidemic models
##### with higher-order lags and an application to outbreak detection
# install the package hhh4addon:
# library(devtools)
# install_github("jbracher/hhh4addon", build_vignettes = TRUE)
#########################################
# get and format data:
library(surveillance)
library(hhh4addon)
library(RColorBrewer)
setwd("/home/johannes/Documents/hhh4predict/Theory/Article_Theory/data_analysis")
# The file auxiliary functions.R needs to be available (equally available in example_analyses.R)
source("auxiliary_functions.R")
library(hhh4contacts)
data("noroBE")
data("rotaBE")
# modify neighbourhood matrices to apply power law:
noroBE@neighbourhood <- noroBE@neighbourhood + 1
rotaBE@neighbourhood <- rotaBE@neighbourhood + 1
data <- list(noro = noroBE, rota = rotaBE)
names_diseases <- names(data)
library(RColorBrewer)
cols_model_versions <- brewer.pal(8, "Dark2")[1:4]
cols_lag_structures <- brewer.pal(8, "Dark2")[c(8, 5:7)]
#########################################
# fit models:
# specify controls:
names_model_versions <- c("full", "simple_seas", "no_cross", "neither")
names_lag_structures <- c("ar1", "ar2", "geom", "pois")
cols_model_versions <- brewer.pal(8, "Dark2")[1:4]
cols_lag_structures <- brewer.pal(8, "Dark2")[c(8, 5:7)]
# define variable for Christmas breaks:
christmas <- numeric(nrow(noroBE@observed))
christmas[(seq_along(christmas) %% 52) %in% c(0, 1)] <- 1
# number of sine/cosine waves to include in the endemic and epidemic components:
S_end <- 1
S_epid <- 1
# full model (a)
ctrl_full <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
ar = list(f = ~-1),
ne = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_epid),
weights = W_powerlaw(maxlag=5, normalize=TRUE, log=TRUE)),
family = "NegBin1",
subset = 6:nrow(noroBE@observed),
data = list(christmas = christmas))
# model (b) with simplified seasonality (no seasonality in epidemic parameters)
ctrl_simple_seas <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
ar = list(f = ~-1),
ne = list(f = ~0 + fe(1, unitSpecific = TRUE),
weights = W_powerlaw(maxlag=5, normalize=TRUE, log=TRUE)),
family = "NegBin1",
subset = 6:nrow(noroBE@observed),
data = list(christmas = christmas))
# model (c) without cross lags
ctrl_no_cross <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
ne = list(f = ~-1),
ar = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_epid)),
family = "NegBin1",
subset = 6:nrow(noroBE@observed),
data = list(christmas = christmas))
# model (d) with neither cross lags nor seasonality in epidemic parameters
ctrl_neither <- list(end = list(f = addSeason2formula(~0 + fe(1, unitSpecific = TRUE) + christmas, S = S_end)),
ne = list(f = ~-1),
ar = list(f = ~0 + fe(1, unitSpecific = TRUE)),
family = "NegBin1",
subset = 6:nrow(noroBE@observed),
data = list(christmas = christmas))
controls <- list(full = ctrl_full,
simple_seas = ctrl_simple_seas,
no_cross = ctrl_no_cross,
neither = ctrl_neither)
# fit all models (this takes a while):
return_full_cov <- FALSE
fits <- full_cov_matrices <- list()
for(disease in names_diseases){
print(paste("--- Started", disease, "---"))
for(model_version in names_model_versions){
# ar1 lag structure:
print(paste(model_version, "ar1"))
fits[[disease]][[model_version]]$ar1 <- hhh4(data[[disease]], controls[[model_version]])
# ar2 lag structure:
print(paste(model_version, "ar2"))
control_ar2_temp <- controls[[model_version]]; control_ar2_temp$funct_lag <- ar2_lag
fit_temp <- profile_par_lag(data[[disease]], control_ar2_temp, return_full_cov = return_full_cov)
if(return_full_cov){
fits[[disease]][[model_version]]$ar2 <- fit_temp$best_mod
full_cov_matrices[[disease]][[model_version]]$ar2 <- fit_temp$cov
}else{
fits[[disease]][[model_version]]$ar2 <- fit_temp
}
# geometric lag structure:
print(paste(model_version, "geom"))
control_geom_temp <- controls[[model_version]]; control_geom_temp$funct_lag <- geometric_lag
fit_temp <- profile_par_lag(data[[disease]], control_geom_temp, return_full_cov = return_full_cov)
if(return_full_cov){
fits[[disease]][[model_version]]$geom <- fit_temp$best_mod
full_cov_matrices[[disease]][[model_version]]$geom <- fit_temp$cov
}else{
fits[[disease]][[model_version]]$geom <- fit_temp
}
# Poisson lag structure:
print(paste(model_version, "pois"))
control_pois_temp <- controls[[model_version]]; control_pois_temp$funct_lag <- poisson_lag
fit_temp <- profile_par_lag(data[[disease]], control_pois_temp, return_full_cov = return_full_cov)
if(return_full_cov){
fits[[disease]][[model_version]]$pois <- fit_temp$best_mod
full_cov_matrices[[disease]][[model_version]]$pois <- fit_temp$cov
}else{
fits[[disease]][[model_version]]$pois <- fit_temp
}
}
}
#########################################
# Assessing models fits etc.
# obtain AICs, lag weights, neighbourhood weights:
AICs <- overdisp <- neighbourhood_weights <- lag_weights <- list()
for(disease in names_diseases){
AICs[[disease]] <- overdisp[[disease]] <-
matrix(nrow = length(names_lag_structures), ncol = length(names_model_versions),
dimnames = list(names_lag_structures, names_model_versions))
neighbourhood_weights[[disease]] <- lag_weights[[disease]] <-
array(dim = c(length(names_lag_structures), length(names_model_versions), 5),
dimnames = list(names_lag_structures, names_model_versions, paste0("lag.", 1:5)))
for(model_version in names_model_versions){
for(lag_structure in names_lag_structures){
AICs[[disease]][lag_structure, model_version] <- AIC(fits[[disease]][[model_version]][[lag_structure]])
overdisp[[disease]][lag_structure, model_version] <-
fits[[disease]][[model_version]][[lag_structure]]$coefficients["-log(overdisp)"]
neighbourhood_weights[[disease]][lag_structure, model_version, ] <-
(1:5)^-exp(fits[[disease]][[model_version]][[lag_structure]]$coefficients["neweights.d"])
lag_weights[[disease]][lag_structure, model_version, ] <- if(lag_structure == "ar1"){
c(1, 0, 0, 0, 0)
}else{
fits[[disease]][[model_version]][[lag_structure]]$distr_lag
}
}
}
}
lag_weights
Delta_AICs_noro <- AICs$noro - max(AICs$noro)
Delta_AICs_rota <- AICs$rota - max(AICs$rota)
# Observations:
# - more weight on higher lags
# - different weighting schemes now differ a bit more
# - weighted lag models borrow less strength across regions
# - slightly decreased overdispersion, i.e. less unexplained variation
#########################################
# Analyse periodically stationary moments:
# compute empirical moments:
emp_mom <- list()
emp_mom$noro <- get_emp_moments(noroBE, start = 1)
emp_mom$rota <- get_emp_moments(rotaBE, start = 1)
# compute model-based moments (both for single observations and calendar-week-wise means):
sm <- sm_of_means <- list()
for(disease in names_diseases){
print(disease)
for(model_version in names_model_versions){
print(model_version)
for(lag_structure in names_lag_structures){
print(lag_structure)
sm[[disease]][[model_version]][[lag_structure]] <-
stationary_moments(fits[[disease]][[model_version]][[lag_structure]], return_Sigma = TRUE)
sm_of_means[[disease]][[model_version]][[lag_structure]] <-
compute_sm_of_means(fits[[disease]][[model_version]][[lag_structure]], n_seasons = 7, return_Sigma = TRUE)
}
}
}
# save(sm_of_means, file = "../model_fits/sm_of_means_nb1.rda") # with faster version actually no need to store this separately.
# load("sm_of_means_nb1.rda")
# plots for comparison:
disease <- "rota"
# means:
par(mfrow = 3:4)
for(unit in 1:12){
plot_stat_means(sm, emp_mom, "rota", unit)
}
par(mfrow = 3:4)
par(mfrow = 3:4)
for(unit in 1:12){
plot_stat_sds(sm, emp_mom, "rota", unit)
}
#########################################
# p-values
# Compute approximative p-values as given in the Supplementary Material
# (the simulation-based ones from the article were computed
# using some custom functions not included in the package and computations are quite long)
teststats <- pvals_approx_liberal <- pvals_approx_conservative <-
teststats_trafo <- pvals_trafo_liberal <- pvals_trafo_conservative <- list()
for(disease in names_diseases){
pvals_approx_liberal[[disease]] <- teststats[[disease]] <-
matrix(nrow = length(names_lag_structures), ncol = length(names_model_versions),
dimnames = list(names_lag_structures, names_model_versions))
for(model_version in names_model_versions){
for(lag_structure in names_lag_structures){
testres_liberal_temp <- get_pval(stat_mom_of_means = sm_of_means[[disease]][[model_version]][[lag_structure]],
emp_mom = emp_mom[[disease]],
correction_df = fits[[disease]][[model_version]][[lag_structure]]$dim[1])
pvals_approx_liberal[[disease]][lag_structure, model_version] <- testres_liberal_temp$pval
teststats[[disease]][lag_structure, model_version] <- testres_liberal_temp$test_stat
}
}
}
########################################
# Analysis of conventional Pearson residuals
pearson_resids <- acfs <- list()
for(disease in names_diseases){
for(model_version in names_model_versions){
for(lag_structure in names_lag_structures){
size_temp <- exp(-fits[[disease]][[model_version]][[lag_structure]]$coefficients["-log(overdisp)"])
fitted_values_temp <- fits[[disease]][[model_version]][[lag_structure]]$fitted.values
cond_sds_temp <- sqrt(fitted_values_temp + size_temp*fitted_values_temp^2)
obs_values_temp <- data[[disease]]@observed[-(1:5), ]
pearson_resids_temp <- (fitted_values_temp - obs_values_temp)/cond_sds_temp
pearson_resids[[disease]][[lag_structure]][[model_version]] <- pearson_resids_temp
pearson_resids[[disease]][[lag_structure]][[model_version]] <- my_acf(pearson_resids_temp)
}
}
}
# choose which model to display:
model_version <- "full"
cols_lag_structures <- c("black", "orange", "purple", "chartreuse3")
lwd_weights <- 2
unit1 <- "pank"
unit2 <- "span"
par(mfrow = c(2, 2))
myplot_acf(pearson_resids$noro$ar1[[model_version]], unit = unit1, main = paste("(a) Norovirus,", unit1),
shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$noro$ar2[[model_version]], unit = unit1,
add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$noro$geom[[model_version]], unit = unit1,
add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$noro$pois[[model_version]], unit = unit1,
add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])
myplot_acf(pearson_resids$noro$ar1[[model_version]], unit = "span", main = paste("(b) Norovirus,", unit2),
shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$noro$ar2[[model_version]], unit = "span",
add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$noro$geom[[model_version]], unit = "span",
add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$noro$pois[[model_version]], unit = "span",
add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])
myplot_acf(pearson_resids$rota$ar1[[model_version]], unit = unit1, main = paste("(c) Rotavirus,", unit1),
shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$rota$ar2[[model_version]], unit = unit1,
add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$rota$geom[[model_version]], unit = unit1,
add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$rota$pois[[model_version]], unit = unit1,
add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])
myplot_acf(pearson_resids$rota$ar1[[model_version]], unit = unit2, main = paste("(d) Rotavirus,", unit2),
shift_x = -0.15, col = cols_lag_structures[1])
myplot_acf(pearson_resids$rota$ar2[[model_version]], unit = unit2,
add = TRUE, shift_x = -0.05, col = cols_lag_structures[2])
myplot_acf(pearson_resids$rota$geom[[model_version]], unit = unit2,
add = TRUE, shift_x = 0.05, col = cols_lag_structures[3])
myplot_acf(pearson_resids$rota$pois[[model_version]], unit = unit2,
add = TRUE, shift_x = 0.15, col = cols_lag_structures[4])
##########
# New Pearson residuals relative to stationary moments
library(RColorBrewer)
# noro
par(mar = c(1, 4.5, 2, 1), font.main = 1, family = "serif", las = 1)
layout_matr <- matrix(c(1, 2,
1, 2,
1, 2,
3, 4,
3, 4,
3, 4,
5, 6,
5, 6,
5, 6,
5, 6#,
# 7, 7,
# 7, 7
), byrow = TRUE, ncol = 2)
layout(layout_matr)
ylim_mu <- c(0, 25)
ylim_sd <- c(0, 15)
plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit1, ylim = ylim_mu)
title("(a) Pankow")
legend("top", col = c(cols_model_versions[1:4], "black"),
legend = c("Model (a) ", "Model (b)",
"Model (c) ", "Model (d)"),
lty = c(1, 1, 1, 1), pch = c(NA, NA, NA, NA), ncol = 2, cex = 1, bty = "n")
plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit2, ylim = ylim_mu)
title("(b) Spandau")
plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit1, ylim = ylim_sd)
# mtext("(a) Pankow", line = 1.3, at = -10, cex = 0.8)
plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "noro", unit = unit2, ylim = ylim_sd)
# mtext("(b) Spandau", line = 1.3, at = -10, cex = 0.8)
par(mar = c(4.3, 4.5, 2, 1), font.main = 1, family = "serif", las = 1)
plot_stat_resids(sm_of_means, emp_mom, "noro",
"full", "geom",
"simple_seas", "geom",
unit1)
plot_stat_resids(sm_of_means, emp_mom, "noro",
"full", "geom",
"simple_seas", "geom",
unit2)
# rota:
plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit1, ylim = ylim_mu)
title("(a) Pankow")
legend("topright", col = c(cols_model_versions[1:4], "black"),
legend = c("Model (a) ", "Model (b)",
"Model (c) ", "Model (d)"),
lty = c(1, 1, 1, 1), pch = c(NA, NA, NA, NA), ncol = 2, cex = 1, bty = "n")
plot_stat_means(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit2, ylim = ylim_mu)
title("(b) Spandau")
plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit1, ylim = ylim_sd)
# mtext("(a) Pankow", line = 1.3, at = -10, cex = 0.8)
plot_stat_sds(stat_mom = sm, emp_mom = emp_mom, "rota", unit = unit2, ylim = ylim_sd)
# mtext("(b) Spandau", line = 1.3, at = -10, cex = 0.8)
par(mar = c(4.3, 4.5, 2, 1), font.main = 1, family = "serif", las = 1)
plot_stat_resids(sm_of_means, emp_mom, "rota",
"full", "geom",
"simple_seas", "geom",
unit1)
plot_stat_resids(sm_of_means, emp_mom, "rota",
"full", "geom",
"simple_seas", "geom",
unit2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.