library(mvtnorm)
library(coda)
library(purrrlyr) # included in tidyverse?
myColours <- c("26 107 133", "241 214 118", "168 45 23")
ECDCcol <- sapply(strsplit(myColours, " "), function(x)
rgb(x[1], x[2], x[3], maxColorValue=255)) # convert to hexadecimal
colours1 <- colorRampPalette(ECDCcol)(5)
####################
## Initialisation ##
####################
## Helen, tidy this up!
rij <- contact_w # country-specific contact matrix
N <- sum(popSize) # population size
D <- 6 / 365 # duration of infection?!
A <- 0.5 # duration of maternally-derived immunity
propFac <- "extloglin" # type of proportionality factor
Lmax <- 70
muy <- predict(demfit, type = "response")
muy <- muy[1 : Lmax] # Ensure not longer than life expectancy
My <- exp(- cumsum(muy)) # Type I mortality
L <- Lmax * mean(My) # Life expectancy
My <- My[1 : Lmax] # Ensure no longer than life expectancy
## Group fixed parameters
otherParams <- list(rij = contact_w, # country-specific contact matrix
N = nrow(seroData), # sum(popSize), # population size
D = 6 / 365, # duration of infection?!
A = 0.5, # duration of maternally-derived immunity
propFac = "constant", # type of proportionality factor
Lmax = 70 # life expectancy
)
#####################################
## Estimate force of infection ##
## and calculate associated R0, R, ##
## prevalence and log-likelihood ##
#####################################
FoI <- function(fitParams, otherParams, seroData){
# Prepare serological data
age <- seroData$AGE
obsData <- seroData$indic
## Generate transmission matrix
if(propFac == "constant"){
if(length(fitParams) != 1){
stop("length of param0 does not fit choice of constant proportionality factor")
}
# Create transmission matrix with constant parameter
qij <- exp(fitParams[1])
bij <- 365 * qij * (rij)[1 : Lmax, 1 : Lmax]
}
if(propFac == "loglin"){ # First option (age-dependent susceptibility)
# Check start parameters fit with prop. factor type
if(length(fitParams) != 2){
stop("length of param0 does not fit choice of proportionality factor")
}
# Create transmission matrix with log-linear parameters
qFunction <- function(x, obsData){exp(fitParams[1] + (fitParams[2] * x))}
qij <- outer(c(1 : Lmax), c(1 : Lmax), qFunction)
bij <- 365 * qij * rij[1 : Lmax, 1 : Lmax]
}
if(propFac == "extloglin"){
if(length(fitParams) != 3){
stop("length of param0 does not fit choice of proportionality factor")
}
# Create transmission matrix with extended log-linear parameters
q.f <- function(x, y){exp(fitParams[1] + fitParams[2] * x + fitParams[3] * y)}
qij <- outer(c(1 : Lmax), c(1 : Lmax), q.f)
bij <- 365 * qij * (rij)[1 : Lmax, 1 : Lmax]
}
if(sum(is.na(bij)) > 0){warning("There are missing values in the transmission matrix beta_ij, which may break the tolerance parameter in the optimisation")}
## Initialise for iterative estimation of force of infection
foiiprev <- rep(0.01, Lmax)
tol <- 1 # initialise tolerance
it <- 0 # iterator
## Iterate to estimate force of infection
while((tol > 1e-10) & (it < 2000)){
# Social contact FOI
# lambda(a) is calculated based on beta(a, a') and I(a', t)
foii <- (N / L) * D * bij %*%
(as.matrix(foiiprev / (foiiprev + muy)) *
matrix(c(1 - exp(- (1 - A) * (foiiprev[1] + muy[1])),
exp(- (1 - A) * (foiiprev[1] + muy[1]) -
c(0, cumsum(foiiprev[- 1] + muy[- 1])[1 : (Lmax - 2)])) -
exp(- (1 - A) * (foiiprev[1] + muy[1]) - c(cumsum(foiiprev[- 1] + muy[- 1])))), ncol = 1))
foii <- apply(cbind(0, foii), 1, max) # Bigger than 0
foii <- apply(cbind(1, foii), 1, min) # Smaller than 1
tol <- sum((foii - foiiprev) ^ 2) # Tolerance is sum of squared difference
it <- it + 1
foiiprev <- foii
}
## Initialise modelled prevalence
prev <- rep(NA, length(age))
ll <- rep(NA, length(age)) # initialise log-like
## Estimate seroprevalence from fitted FoI model
for(k in 1 : length(age)){
prev[k] <- (1 - exp(c(- (age[k] - A) * foii[1],
- (1 - A) * foii[1] - cumsum(c(0, foii[- 1]))
- (foii[- 1])[floor(age[k])] * (age[k] - floor(age[k])))))[floor(age[k]) + 1]
# Bernoulli loglikelihood used to estimate FOI
ll[k] <- obsData[k] * log(prev[k]) + (1 - obsData[k]) * log(1 - prev[k])
}
## Estimate R0 from next-generation matrix
R0ij <- (N / L) * D * bij[1 : Lmax, 1 : Lmax]
Mij <- diag(c(My[1 : Lmax])) # g(a) (see notes)
NGM <- Mij %*% R0ij
NGM[is.na(NGM)] <- 0 # Replace missings with zeros
R0vec <- eigen(NGM, symmetric = FALSE, only.values = TRUE)$values
R0 <- max(as.double(R0vec)) # spectral radius
## Estimate R
seroTab <- table(floor(age), obsData) # Table with seroprevalence for each single year of age
suscp <- seroTab[, 1] / rowSums(seroTab)
# len included to reflect situations where NGM might be smaller than the size
# of seroTab
len <- min(dim(NGM)[1], length(suscp))
Rvec <- NGM[1 : len, 1 : len] %*% diag(suscp[1 : len])
R <- max(as.double(Rvec)) # spectral radius
ll[!is.finite(ll)] <- 0
return(list(lnLike = sum(ll), # log-likelihood
R0 = R0, # R0
R = R, # R
prev = prev, # estimated seroprevalence
foi = foiiprev)) # estimated force of infection
}
###################
## MCMC function ##
###################
## Initialise MCMC params
# no. of parameters to be estimated
nEstimate <- case_when(
propFac == "constant" ~ 1,
propFac == "loglin" ~ 2,
propFac == "extloglin" ~ 3
)
nIter <- 2e3 # number of iterations per chain
prior <- c(-2, -1, -0.1, 0, -0.5, 0) # set uniform prior for q params
#prior <- c(-5, 5)
dim(prior) <- c(2, nEstimate)
prior <- t(prior)
if(dim(prior)[1] / nEstimate != 1)
warning("Prior length does not match desired propFac")
init.scale.sd <- 1e-2 # set scaling factor for proposal distribution
covmat.proposal <- diag(nEstimate) * init.scale.sd * (prior[,2]-prior[,1]) # proposal distribution
adapt.size.start <- 100
adapt.size.cooling <- 0.99
adapt.shape.start <- 75
adapt.shape.stop <- NULL
max.scaling.sd <- 50
acc <- 0 # Initialise acceptance
acc.rate <- 1 #Initalise acceptance rate
mcmcOutput <- rep(NA, (nEstimate+1)*nIter) # Initialise output
dim(mcmcOutput) <- c(nIter,(nEstimate+1))
## Function for updating covariance matrix
updateCovmat <- function(covmat, param.mean, param0, i) {
residual <- as.vector(param0-param.mean)
covmat <- (covmat*(i-1)+(i-1)/i*residual%*%t(residual))/i
param.mean <- param.mean + residual/i
return(list(covmat=covmat,param.mean=param.mean))
}
######################
## Initialise chain ##
######################
#param0 <- -0.5
param0 <- c(-1.5, -0.05, -0.25) # initial values for q parameters
if(length(param0) / nEstimate != 1)
warning("Parameter input length does not match desired propFac")
lnLike0 <- FoI(param0, otherParams, seroData)$lnLike # Initialise log-likelihood
# Initialise covariance matrix
covmat.proposal.init <- covmat.proposal
adapting.size <- FALSE # will be set to TRUE once we start adapting the size
adapting.shape <- 0 # will be set to the iteration at which adaptation starts
# scaling factor for covmat size
scaling.sd <- 1
# scaling multiplier
scaling.multiplier <- 1
# empirical covariance matrix (0 everywhere initially)
covmat.empirical <- covmat.proposal
covmat.empirical[,] <- 0
# empirical mean vector
param.mean <- param0
ptm <- proc.time()[3] # set clock
for(i.iteration in 1:nIter){
# Adaptation of proposal distribution
if (!is.null(adapt.size.start) && i.iteration >= adapt.size.start &&
(is.null(adapt.shape.start) || acc.rate*i.iteration < adapt.shape.start)) {
if (!adapting.size) {
message("\n---> Start adapting size of covariance matrix")
adapting.size <- TRUE
}
# adapt size of covmat until we get enough accepted jumps
scaling.multiplier <- exp(adapt.size.cooling^(i.iteration-adapt.size.start) * (acc.rate - 0.234))
scaling.sd <- scaling.sd * scaling.multiplier
scaling.sd <- min(c(scaling.sd, max.scaling.sd))
# only scale if it doesn't reduce the covariance matrix to 0
covmat.proposal.new <- scaling.sd^2 * covmat.proposal.init
if (!(any(diag(covmat.proposal.new) <
.Machine$double.eps))) {
covmat.proposal <- covmat.proposal.new
}
} else if (!is.null(adapt.shape.start) &&
acc.rate*i.iteration >= adapt.shape.start &&
(adapting.shape == 0 || is.null(adapt.shape.stop) ||
i.iteration < adapting.shape + adapt.shape.stop)) {
if (!adapting.shape) {
message("\n---> Start adapting shape of covariance matrix")
# flush.console()
adapting.shape <- i.iteration
}
## adapt shape of covariance matrix using optimal scaling factor for multivariate target distributions
scaling.sd <- 2.38/sqrt(nEstimate)
covmat.proposal <- scaling.sd^2 * covmat.empirical
} else if (adapting.shape > 0) {
message("\n---> Stop adapting shape of covariance matrix")
adapting.shape <- -1
}
param1 <- drop(rmvnorm(1, mean=param0, sigma=covmat.proposal))
# Ensure that sampled parameters fall within prior. If not, then bounce off boundary
#for(j in 1:length(prior[,1])){
# param1[j] <- ifelse(param1[j] > prior[j,2], prior[j,2]-((param1[j]-prior[j,2])%%(prior[j,2]-prior[j,1])), ifelse(param1[j] < prior[j,1], prior[j,1]+((prior[j,1]-param1[j])%%(prior[j,2]-prior[j,1])), param1[j]))
# }
lnLike1 <- FoI(param1, otherParams, seroData)$lnLike
if(!is.na(lnLike1)){
logAlpha <- (lnLike1 + dmvnorm(param0, param1, covmat.proposal, log=T)) - (lnLike0 + dmvnorm(param1, param0, covmat.proposal, log=T))
alpha <- exp(logAlpha)
if(alpha > 1) alpha <- 1
}
else alpha <- 0
u <- runif(1,0,1)
if(u < alpha){
param0 <- param1
lnLike0 <- lnLike1
acc <- acc+1
}
# update empirical covariance matrix
if (adapting.shape >= 0) {
tmp <- updateCovmat(covmat.empirical, param.mean,
param0, i.iteration)
covmat.empirical <- tmp$covmat
param.mean <- tmp$param.mean
}
acc.rate <- acc/i.iteration
cat("\nAcceptance rate at jump", i.iteration, "is", round(acc.rate, digits=3) ,"\n")
mcmcOutput[i.iteration,1:nEstimate] <- param0
mcmcOutput[i.iteration,(nEstimate+1)] <- lnLike0
}
cat("\nAlgorithm required", (proc.time()[3]-ptm)/60, "minutes\nwith final acceptance rate", round(acc.rate, digits=3), "\n")
burnIn <- 500
mcmcOutput <- as.mcmc(mcmcOutput)
mcmcResults <- window(mcmcOutput, start=(1)) #
plot(mcmcResults)
ESS <- effectiveSize(mcmcResults)
AIC <- (2*nEstimate) + 2 * max(mcmcOutput[,ncol(mcmcOutput)]) # Akaike information criterion
mcmcOutput <- as_tibble(mcmcOutput)
sampleSize <- 100
if(propFac == "constant"){
postSample <- mcmcOutput %>% tail(-burnIn) %>%
sample_n(sampleSize, replace=TRUE) %>%
select(V1) %>%
rename(gamma0 = V1) %>%
mutate(q1 = exp(gamma0))
# Generate results for posterior samples
sampledResults <- as.vector(postSample$gamma0) %>%
map(FoI, otherParams, seroData)
}
if(propFac == "loglin"){
postSample <- mcmcOutput %>% tail(-burnIn) %>%
sample_n(sampleSize, replace=TRUE) %>%
select(V1, V2) %>%
rename(gamma0 = V1) %>%
rename(gamma1 = V2) %>%
mutate(q1 = exp(gamma0 + (gamma1 * mean(seq(0.5, Lmax-0.5)))))
# Generate results for posterior samples
sampledResults <- mapply(c, postSample$gamma0, postSample$gamma1, SIMPLIFY = FALSE) %>% # list of pairs (gamma0, gamma1)
map(FoI, otherParams, seroData)
}
if(propFac == "extloglin"){
postSample <- mcmcOutput %>% tail(-burnIn) %>%
sample_n(sampleSize, replace=TRUE) %>%
select(V1, V2, V3) %>%
rename(gamma0 = V1) %>%
rename(gamma1 = V2) %>%
rename(gamma2 = V3) %>%
mutate(q1 = exp(gamma0 + (gamma1 * mean(seq(0.5, Lmax-0.5))) + (gamma2 * mean(seq(0.5, Lmax-0.5)))))
# Generate results for posterior samples
sampledResults <- mapply(c, postSample$gamma0, postSample$gamma1, postSample$gamma2, SIMPLIFY = FALSE) %>% # list of pairs (gamma0, gamma1, gamma2)
map(FoI, otherParams, seroData)
}
# R and R0 values from sampled parameters
sampledR <- sampledResults %>% {
tibble(
R = map_dbl(., "R"),
R0 = map_dbl(., "R0")
)
}
sampledR <- bind_cols(postSample, sampledR)
#sampledR %>% filter(R >= 0.95 & R < 1.05) # filter for parameter sets close to endmeic eqm
# Re-format force of infection estimates from sampled parameters
sampledFoI <- map_dfc(sampledResults, extract, "foi")
sampledFoI <- as_tibble(sampledFoI) %>% # combine with age data and reduce to unique values (i.e. not list for whole serosurvey sample)
mutate(age = seq(0.5, Lmax-0.5)) %>% # check this. Should it indeed be mid-points?
gather(key=foiName, value=foi, -age) %>%
mutate(set = sapply(strsplit(foiName, split='i', fixed=TRUE),function(x) (x[2]))) %>% # derive set name
mutate(set = replace(set, is.na(set), 0)) %>% # rename the first set as 0
mutate(set = as.integer(set) + 1) %>% # to avoid 0/1 confusion
select(set, age, foi) # re-order for clarity
width1 <- 0.95
summaryFoI <- sampledFoI %>%
group_by(age) %>%
summarise(n = n(),
midFoi = median(foi),
lower = quantile(foi, probs = (1-width1)),
upper = quantile(foi, probs = width1)) %>%
mutate(width = width1) %>%
mutate(point = "median")
# Re-format prevalence estimates from sampled parameters
sampledPrev <- map_dfc(sampledResults, extract, "prev")
sampledPrev <- as_tibble(unique(cbind(seroData$AGE, sampledPrev))) %>% # combine with age data and reduce to unique values (i.e. not list for whole serosurvey sample)
gather(key=prevName, value=prev, -`seroData$AGE`) %>%
mutate(set = sapply(strsplit(prevName, split='v', fixed=TRUE),function(x) (x[2]))) %>% # derive set name
mutate(set = replace(set, is.na(set), 0)) %>% # rename the first set as 0
mutate(set = as.integer(set) + 1) %>% # to avoid 0/1 confusion
rename(age = 'seroData$AGE') %>%
select(set, age, prev) # re-order for clarity
summaryPrev <- sampledPrev %>%
group_by(age) %>%
summarise(n = n(),
midPrev = median(prev),
lower = quantile(prev, probs = (1-width1)),
upper = quantile(prev, probs = width1)) %>%
mutate(width = width1) %>%
mutate(point = "median")
# Plot force of infection and prevalence
htab <- table(
floor(
seroData[(seroData$AGE > 0.5) &
(seroData$AGE < 80) &
(!is.na(seroData$AGE)) &
!is.na(seroData$indic), ]$AGE[order(seroData[(seroData$AGE > 0.5) &
(seroData$AGE < 80) &
(!is.na(seroData$AGE)) &
!is.na(seroData$indic), ]$AGE)]),
seroData[(seroData$AGE > 0.5) &
(seroData$AGE < 80) &
(!is.na(seroData$AGE)) &
!is.na(seroData$indic), ]$indic[order(seroData[(seroData$AGE > 0.5) &
(seroData$AGE < 80) &
(!is.na(seroData$AGE)) &
!is.na(seroData$indic), ]$AGE)])
(p <- ggplot() +
geom_line(data = summaryFoI, mapping = aes(x = age, y = midFoi),
size = 0.01, alpha = 0.8) +
geom_ribbon(data = summaryFoI, mapping = aes(x = age, ymin = lower, ymax = upper),
fill = "blue", alpha = 0.2) +
geom_line(data = summaryPrev, mapping = aes(x = age, y = midPrev),
size = 0.01, alpha = 0.8) +
geom_ribbon(data = summaryPrev, mapping = aes(x = age, ymin = lower, ymax = upper),
fill = "dark green", alpha = 0.3) +
ylim(0, 1) +
geom_point(data = as.data.frame(cbind(age = as.numeric(row.names(htab)),
prop = htab[, 2] / rowSums(htab),
tot = rowSums(htab))),
mapping = aes(x = age, y = prop, size = tot),
pch = 1) +
labs(y = "", size = "number of samples"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.