demo/v77i11.R

################################################################################
### Replication code from Meyer et al. (2017, JSS),
### illustrating the spatio-temporal endemic-epidemic modelling frameworks
### 'twinstim', 'twinSIR', and 'hhh4'. The full reference is:
###
### Meyer, Held, and Hoehle (2017):
### Spatio-Temporal Analysis of Epidemic Phenomena Using the R Package surveillance.
### Journal of Statistical Software, 77(11), 1-55.
### https://doi.org/10.18637/jss.v077.i11
###
### Changes to the original replication script are marked with a "#M" comment.
###
### Copyright (C) 2017 Sebastian Meyer, Leonhard Held, Michael Hoehle
###
### This file is part of the R package "surveillance",
### free software under the terms of the GNU General Public License, version 2,
### a copy of which is available at http://www.r-project.org/Licenses/.
################################################################################


################################################################################
## Section 3: Spatio-temporal point pattern of infective events
################################################################################
library("surveillance")  # you should also have installed the suggested packages

## 3.2. Data structure: 'epidataCS'
data("imdepi", package = "surveillance")
events <- SpatialPointsDataFrame(
    coords = coordinates(imdepi$events),
    data = marks(imdepi, coords = FALSE),
    proj4string = imdepi$events@proj4string  # ETRS89 projection (+units = km)
    )
stgrid <- imdepi$stgrid[,-1]
load(system.file("shapes", "districtsD.RData", package = "surveillance"))
imdepi <- as.epidataCS(events = events, W = stateD, stgrid = stgrid,
  qmatrix = diag(2), nCircle2Poly = 16)
summary(events)

.stgrid.excerpt <- format(rbind(head(stgrid, 3), tail(stgrid, 3)), digits = 3)
rbind(.stgrid.excerpt[1:3, ], "..." = "...", .stgrid.excerpt[4:6, ])

imdepi

summary(imdepi)

par(mar = c(5, 5, 1, 1), las = 1)
plot(as.stepfun(imdepi), xlim = summary(imdepi)$timeRange, xaxs = "i",
  xlab = "Time [days]", ylab = "Current number of infectives", main = "")
## axis(1, at = 2557, labels = "T", font = 2, tcl = -0.3, mgp = c(3, 0.3, 0))

par(las = 1)
plot(imdepi, "time", col = c("indianred", "darkblue"), ylim = c(0, 20))
par(mar = c(0, 0, 0, 0))
plot(imdepi, "space", lwd = 2,
  points.args = list(pch = c(1, 19), col = c("indianred", "darkblue")))
layout.scalebar(imdepi$W, scale = 100, labels = c("0", "100 km"), plot = TRUE)

## animation::saveHTML(
##   animate(subset(imdepi, type == "B"), interval = c(0, 365), time.spacing = 7),
##   nmax = Inf, interval = 0.2, loop = FALSE,
##   title = "Animation of the first year of type B events")

eventDists <- dist(coordinates(imdepi$events))
(minsep <- min(eventDists[eventDists > 0]))
set.seed(321)
imdepi_untied <- untie(imdepi, amount = list(s = minsep / 2))

imdepi_untied_infeps <- update(imdepi_untied, eps.s = Inf)

imdsts <- epidataCS2sts(imdepi, freq = 12, start = c(2002, 1), tiles = districtsD)
par(las = 1, lab = c(7, 7, 7), mar = c(5, 5, 1, 1))
plot(imdsts, type = observed ~ time)
plot(imdsts, type = observed ~ unit, population = districtsD$POPULATION / 100000)

## 3.3. Modeling and inference
(endemic <- addSeason2formula(~offset(log(popdensity)) + I(start / 365 - 3.5),
  period = 365, timevar = "start"))

imdfit_endemic <- twinstim(endemic = endemic, epidemic = ~0,
  data = imdepi_untied, subset = !is.na(agegrp))

summary(imdfit_endemic)

imdfit_Gaussian <- update(imdfit_endemic, epidemic = ~type + agegrp,
  siaf = siaf.gaussian(F.adaptive = TRUE),
  ##M set F.adaptive=TRUE for replication with surveillance >= 1.15.0
  start = c("e.(Intercept)" = -12.5, "e.siaf.1" = 2.75),
  control.siaf = list(F = list(adapt = 0.25), Deriv = list(nGQ = 13)),
  cores = 2 * (.Platform$OS.type == "unix"), model = TRUE)

print(xtable(imdfit_Gaussian,
             caption = "Estimated rate ratios (RR) and associated Wald confidence intervals (CI) for endemic (\\code{h.}) and epidemic (\\code{e.}) terms. This table was generated by \\code{xtable(imdfit\\_Gaussian)}.",
             label = "tab:imdfit_Gaussian"),
      sanitize.text.function = NULL, sanitize.colnames.function = NULL,
      sanitize.rownames.function = function(x) paste0("\\code{", x, "}"))

R0_events <- R0(imdfit_Gaussian)
tapply(R0_events, marks(imdepi_untied)[names(R0_events), "type"], mean)

imdfit_powerlaw <- update(imdfit_Gaussian, data = imdepi_untied_infeps,
  siaf = siaf.powerlaw(), control.siaf = NULL,
  start = c("e.(Intercept)" = -6.2, "e.siaf.1" = 1.5, "e.siaf.2" = 0.9))

imdfit_step4 <- update(imdfit_Gaussian, data = imdepi_untied_infeps,
  siaf = siaf.step(exp(1:4 * log(100) / 5), maxRange = 100), control.siaf = NULL,
  start = c("e.(Intercept)" = -10, setNames(-2:-5, paste0("e.siaf.", 1:4))))

par(mar = c(5, 5, 1, 1))
set.seed(2)  # Monte-Carlo confidence intervals
plot(imdfit_Gaussian, "siaf", xlim = c(0, 42), ylim = c(0, 5e-5), lty = c(1, 3),
     xlab = expression("Distance " * x * " from host [km]"))
plot(imdfit_powerlaw, "siaf", add = TRUE, col.estimate = 4, lty = c(2, 3))
plot(imdfit_step4, "siaf", add = TRUE, col.estimate = 3, lty = c(4, 3))
legend("topright", legend = c("Power law", "Step (df = 4)", "Gaussian"),
       col = c(4, 3, 2), lty = c(2, 4, 1), lwd = 3, bty = "n")

AIC(imdfit_endemic, imdfit_Gaussian, imdfit_powerlaw, imdfit_step4)

## Example of AIC-based stepwise selection of the endemic model
imdfit_endemic_sel <- stepComponent(imdfit_endemic, component = "endemic")
## -> none of the endemic predictors is removed from the model

par(mar = c(5, 5, 1, 1), las = 1)
intensity_endprop <- intensityplot(imdfit_powerlaw, aggregate = "time",
                                   which = "endemic proportion", plot = FALSE)
intensity_total <- intensityplot(imdfit_powerlaw, aggregate = "time",
                                 which = "total", tgrid = 501, lwd = 2,
                                 xlab = "Time [days]", ylab = "Intensity")
curve(intensity_endprop(x) * intensity_total(x), add = TRUE, col = 2, lwd = 2, n = 501)
## curve(intensity_endprop(x), add = TRUE, col = 2, lty = 2, n = 501)
text(2500, 0.36, labels = "total", col = 1, pos = 2, font = 2)
text(2500, 0.08, labels = "endemic", col = 2, pos = 2, font = 2)

## meanepiprop <- integrate(intensityplot(imdfit_powerlaw, which = "epidemic proportion"),
##                          50, 2450, subdivisions = 2000, rel.tol = 1e-3)$value / 2400

for (.type in 1:2) {
    print(intensityplot(imdfit_powerlaw, aggregate = "space", which = "epidemic proportion",
                        types = .type, tiles = districtsD, sgrid = 5000,
                        col.regions = grey(seq(1,0,length.out = 10)), at = seq(0,1,by = 0.1)))
    grid::grid.text("Epidemic proportion", x = 1, rot = 90, vjust = -1)
}

par(mar = c(5, 5, 1, 1))
checkResidualProcess(imdfit_powerlaw)

## 3.4. Simulation
imdsims <- simulate(imdfit_powerlaw, nsim = 30, seed = 1, t0 = 1826, T = 2555,
  data = imdepi_untied_infeps, tiles = districtsD)

table(imdsims[[1]]$events$source > 0, exclude = NULL)

.t0 <- imdsims[[1]]$timeRange[1]
.cumoffset <- c(table(subset(imdepi, time < .t0)$events$type))
par(mar = c(5, 5, 1, 1), las = 1)
plot(imdepi, ylim = c(0, 20), col = c("indianred", "darkblue"),
     subset = time < .t0, cumulative = list(maxat = 336),
     xlab = "Time [days]")
for (i in seq_along(imdsims$eventsList))
    plot(imdsims[[i]], add = TRUE, legend.types = FALSE,
         col = scales::alpha(c("indianred", "darkblue"), 0.5),
         subset = !is.na(source),  # exclude events of the prehistory
         cumulative = list(offset = .cumoffset, maxat = 336, axis = FALSE),
         border = NA, density = 0) # no histogram for simulations
plot(imdepi, add = TRUE, legend.types = FALSE,
     col = 1, subset = time >= .t0,
     cumulative = list(offset = .cumoffset, maxat = 336, axis = FALSE),
     border = NA, density = 0) # no histogram for the last year's data
abline(v = .t0, lty = 2, lwd = 2)

################################################################################
## Section 4: SIR event history of a fixed population
################################################################################
library("surveillance")  # you should also have installed the suggested packages

## 4.2. Data structure: 'epidata'
data("hagelloch", package = "surveillance")
head(hagelloch.df, n = 5)

hagelloch <- as.epidata(hagelloch.df,
  t0 = 0, tI.col = "tI", tR.col = "tR",
  id.col = "PN", coords.cols = c("x.loc", "y.loc"),
  f = list(household    = function(u) u == 0,
           nothousehold = function(u) u > 0),
  w = list(c1 = function (CL.i, CL.j) CL.i == "1st class" & CL.j == CL.i,
           c2 = function (CL.i, CL.j) CL.i == "2nd class" & CL.j == CL.i),
  keep.cols = c("SEX", "AGE", "CL"))
head(hagelloch, n = 5)

par(mar = c(5, 5, 1, 1))
plot(hagelloch, xlab = "Time [days]")

par(mar = c(5, 5, 1, 1))
hagelloch_coords <- summary(hagelloch)$coordinates
plot(hagelloch_coords, xlab = "x [m]", ylab = "y [m]",
  pch = 15, asp = 1, cex = sqrt(multiplicity(hagelloch_coords)))
legend(x = "topleft", pch = 15, legend = c(1, 4, 8), pt.cex = sqrt(c(1, 4, 8)),
  title = "Household size")

## 4.3. Modeling and inference
hagellochFit <- twinSIR(~household + c1 + c2 + nothousehold, data = hagelloch)
summary(hagellochFit)

exp(confint(hagellochFit, parm = "cox(logbaseline)"))

prof <- profile(hagellochFit,
  list(c(match("c1", names(coef(hagellochFit))), NA, NA, 25),
       c(match("c2", names(coef(hagellochFit))), NA, NA, 25)))
prof$ci.hl

plot(prof)

par(mar = c(5, 5, 1, 1))
plot(hagellochFit, which = "epidemic proportion", xlab = "time [days]")
checkResidualProcess(hagellochFit, plot = 1)

knots <- c(100, 200)
fstep <- list(
  B1 = function(D) D > 0 & D < knots[1],
  B2 = function(D) D >= knots[1] & D < knots[2],
  B3 = function(D) D >= knots[2])
hagellochFit_fstep <- twinSIR(
  ~household + c1 + c2 + B1 + B2 + B3,
  data = update(hagelloch, f = fstep))

set.seed(1)
AIC(hagellochFit, hagellochFit_fstep)

################################################################################
## Section 5. Areal time series of counts
################################################################################
library("surveillance")  # you should also have installed the suggested packages

## 5.2. Data structure: 'sts'
## extract components from measlesWeserEms to reconstruct
data("measlesWeserEms", package = "surveillance")
counts <- observed(measlesWeserEms)
map <- measlesWeserEms@map
populationFrac <- measlesWeserEms@populationFrac

weserems_nbOrder <- nbOrder(poly2adjmat(map), maxlag = 10)

measlesWeserEms <- sts(observed = counts, start = c(2001, 1), frequency = 52,
  neighbourhood = weserems_nbOrder, map = map, population = populationFrac)

plot(measlesWeserEms, type = observed ~ time)
plot(measlesWeserEms, type = observed ~ unit,
  population = measlesWeserEms@map$POPULATION / 100000,
  labels = list(font = 2), colorkey = list(space = "right"),
  sp.layout = layout.scalebar(measlesWeserEms@map, corner = c(0.05, 0.05),
    scale = 50, labels = c("0", "50 km"), height = 0.03))

plot(measlesWeserEms, units = which(colSums(observed(measlesWeserEms)) > 0))

## animation::saveHTML(
##   animate(measlesWeserEms, tps = 1:52, total.args = list()),
##   title = "Evolution of the measles epidemic in the Weser-Ems region, 2001",
##   ani.width = 500, ani.height = 600)

## ## to perform the following analysis using biweekly aggregated measles counts:
## measlesWeserEms <- aggregate(measlesWeserEms, by = "time", nfreq = 26)

## 5.3. Modeling and inference
measlesModel_basic <- list(
  end = list(f = addSeason2formula(~1 + t, period = measlesWeserEms@freq),
             offset = population(measlesWeserEms)),
  ar = list(f = ~1),
  ne = list(f = ~1, weights = neighbourhood(measlesWeserEms) == 1),
  family = "NegBin1")

measlesFit_basic <- hhh4(stsObj = measlesWeserEms, control = measlesModel_basic)
summary(measlesFit_basic, idx2Exp = TRUE, amplitudeShift = TRUE, maxEV = TRUE)

plot(measlesFit_basic, type = "season", components = "end", main = "")

confint(measlesFit_basic, parm = "overdisp")

AIC(measlesFit_basic, update(measlesFit_basic, family = "Poisson"))

districts2plot <- which(colSums(observed(measlesWeserEms)) > 20)
plot(measlesFit_basic, type = "fitted", units = districts2plot, hide0s = TRUE)

Sprop <- matrix(1 - measlesWeserEms@map@data$vacc1.2004,
  nrow = nrow(measlesWeserEms), ncol = ncol(measlesWeserEms), byrow = TRUE)
summary(Sprop[1, ])

Soptions <- c("unchanged", "Soffset", "Scovar")
SmodelGrid <- expand.grid(end = Soptions, ar = Soptions)
row.names(SmodelGrid) <- do.call("paste", c(SmodelGrid, list(sep = "|")))

measlesFits_vacc <- apply(X = SmodelGrid, MARGIN = 1, FUN = function (options) {
  updatecomp <- function (comp, option) switch(option,
    "unchanged" = list(),
    "Soffset" = list(offset = comp$offset * Sprop),
    "Scovar" = list(f = update(comp$f, ~. + log(Sprop))))
  update(measlesFit_basic,
    end = updatecomp(measlesFit_basic$control$end, options[1]),
    ar = updatecomp(measlesFit_basic$control$ar, options[2]),
    data = list(Sprop = Sprop))
  })

aics_vacc <- do.call(AIC, lapply(names(measlesFits_vacc), as.name),
  envir = as.environment(measlesFits_vacc))
aics_vacc[order(aics_vacc[, "AIC"]), ]

measlesFit_vacc <- measlesFits_vacc[["Scovar|unchanged"]]
coef(measlesFit_vacc, se = TRUE)["end.log(Sprop)", ]

measlesFit_nepop <- update(measlesFit_vacc,
  ne = list(f = ~log(pop)), data = list(pop = population(measlesWeserEms)))
measlesFit_powerlaw <- update(measlesFit_nepop,
  ne = list(weights = W_powerlaw(maxlag = 5)))
measlesFit_np2 <- update(measlesFit_nepop,
  ne = list(weights = W_np(maxlag = 2)))

library("lattice")
trellis.par.set("reference.line", list(lwd = 3, col="gray"))
trellis.par.set("fontsize", list(text = 14))
plot(measlesFit_powerlaw, type = "neweights", plotter = stripplot,
  panel = function (...) {panel.stripplot(...); panel.average(...)},
  jitter.data = TRUE, xlab = expression(o[ji]), ylab = expression(w[ji]))
## non-normalized weights (power law and unconstrained second-order weight)
local({
    colPL <- "#0080ff"
    ogrid <- 1:5
    par(mar = c(3.6, 4, 2.2, 2), mgp = c(2.1, 0.8, 0))
    plot(ogrid, ogrid^-coef(measlesFit_powerlaw)["neweights.d"], col = colPL,
         xlab = "Adjacency order", ylab = "Non-normalized weight", type = "b", lwd = 2)
    matlines(t(sapply(ogrid, function (x)
                      x^-confint(measlesFit_powerlaw, parm = "neweights.d"))),
             type = "l", lty = 2, col = colPL)
    w2 <- exp(c(coef(measlesFit_np2)["neweights.d"],
                 confint(measlesFit_np2, parm = "neweights.d")))
    lines(ogrid, c(1, w2[1], 0, 0, 0), type = "b", pch = 19, lwd = 2)
    arrows(x0 = 2, y0 = w2[2], y1 = w2[3], length = 0.1, angle = 90, code = 3, lty = 2)
    legend("topright", col = c(colPL, 1), pch = c(1, 19), lwd = 2, bty = "n",
           inset = 0.1, y.intersp = 1.5,
           legend = c("Power-law model", "Second-order model"))
})

AIC(measlesFit_nepop, measlesFit_powerlaw, measlesFit_np2)

measlesFit_ri <- update(measlesFit_powerlaw,
  end = list(f = update(formula(measlesFit_powerlaw)$end, ~. + ri() - 1)),
  ar  = list(f = update(formula(measlesFit_powerlaw)$ar,  ~. + ri() - 1)),
  ne  = list(f = update(formula(measlesFit_powerlaw)$ne,  ~. + ri() - 1)))
summary(measlesFit_ri, amplitudeShift = TRUE, maxEV = TRUE)

head(ranef(measlesFit_ri, tomatrix = TRUE), n = 3)

stopifnot(ranef(measlesFit_ri) > -1.6, ranef(measlesFit_ri) < 1.6)
for (comp in c("ar", "ne", "end")) {
  print(plot(measlesFit_ri, type = "ri", component = comp,
    col.regions = rev(cm.colors(100)), labels = list(cex = 0.6),
    at = seq(-1.6, 1.6, length.out = 15)))
}

plot(measlesFit_ri, type = "fitted", units = districts2plot, hide0s = TRUE)
plot(measlesFit_ri, type = "maps", prop = TRUE,
  labels = list(font = 2, cex = 0.6))

tp <- c(65, 77)
models2compare <- paste0("measlesFit_", c("basic", "powerlaw", "ri"))
measlesPreds1 <- lapply(mget(models2compare), oneStepAhead,
  tp = tp, type = "final")

stopifnot(all.equal(measlesPreds1$measlesFit_powerlaw$pred,
                    fitted(measlesFit_powerlaw)[tp[1]:tp[2], ],
                    check.attributes = FALSE))

stopifnot(identical(
    measlesFit_powerlaw$loglikelihood,
    -sum(scores(oneStepAhead(measlesFit_powerlaw, tp = 1, type = "final"),
                which = "logs", individual = TRUE))))

SCORES <- c("logs", "rps", "dss", "ses")
measlesScores1 <- lapply(measlesPreds1, scores, which = SCORES, individual = TRUE)
t(sapply(measlesScores1, colMeans, dims = 2))

measlesPreds2 <- lapply(mget(models2compare), oneStepAhead,
  tp = tp, type = "rolling", which.start = "final",
  cores = 2 * (.Platform$OS.type == "unix"))
measlesScores2 <- lapply(measlesPreds2, scores, which = SCORES, individual = TRUE)
t(sapply(measlesScores2, colMeans, dims = 2))

set.seed(321)
sapply(SCORES, function (score) permutationTest(
  measlesScores2$measlesFit_ri[, , score],
  measlesScores2$measlesFit_basic[, , score]))

calibrationTest(measlesPreds2[["measlesFit_ri"]], which = "rps")

par(mfrow = sort(n2mfrow(length(measlesPreds2))), mar = c(4.5, 4.5, 3, 1))
for (m in models2compare)
  pit(measlesPreds2[[m]], plot = list(ylim = c(0, 1.25), main = m))

## 5.4. Simulation
(y.start <- observed(measlesWeserEms)[52, ])
measlesSim <- simulate(measlesFit_ri,
  nsim = 100, seed = 1, subset = 53:104, y.start = y.start)
summary(colSums(measlesSim, dims = 2))

par(las = 1, mar = c(5, 5, 1, 1))
plot(measlesSim, "time", ylim = c(0, 100))

Try the surveillance package in your browser

Any scripts or data that you put into this service are public.

surveillance documentation built on Oct. 9, 2017, 3:02 p.m.