#' Estimation of the reproduction number with Laplacian-P-splines via MCMC
#'
#' @description
#' This routine estimates the instantaneous reproduction number \eqn{R_t};
#' the mean number of secondary infections generated by an infected individual
#' at time \eqn{t} (White et al. 2020); by using Bayesian P-splines and Laplace
#' approximations (Gressani et al. 2022). The inference approach is fully
#' stochastic with a Metropolis-adjusted Langevin algorithm. The
#' \code{estimRmcmc()} routine estimates \eqn{R_t} based on a time series of
#' incidence counts and a (discretized) serial interval distribution. The
#' negative binomial distribution is used to model incidence count data and
#' P-splines (Eilers and Marx, 1996) are used to smooth the epidemic curve.
#' The link between the epidemic curve and the reproduction number is
#' established via the renewal equation.
#'
#' @usage estimRmcmc(incidence, si, K = 30, dates = NULL, niter = 5000, burnin = 2000,
#' CoriR = FALSE, WTR = FALSE, priors = Rmodelpriors(), progressbar = TRUE)
#'
#' @param incidence A vector containing the incidence time series. If
#' \code{incidence} contains NA values at certain time points, these are
#' replaced by the average of the left- and right neighbor counts. If the
#' right neighbor is NA, the left neighbor is used as a replacement value.
#' @param si The (discrete) serial interval distribution.
#' @param K Number of B-splines in the basis.
#' @param dates A vector of dates in format "YYYY-MM-DD" (optional).
#' @param niter The number of MCMC samples.
#' @param burnin The burn-in size.
#' @param CoriR Should the \eqn{R_t} estimate of Cori (2013) be also computed?
#' @param WTR Should the \eqn{R_t} estimate of Wallinga-Teunis (2004) be
#' also computed?
#' @param priors A list containing the prior specification of the model
#' hyperparameters as set in Rmodelpriors. See ?Rmodelpriors.
#' @param progressbar Should a progression bar indicating status of MCMC
#' algorithm be shown? Default is TRUE.
#'
#' @return A list with the following components:
#' \itemize{
#' \item incidence: The incidence time series.
#' \item si: The serial interval distribution.
#' \item RLPS: A data frame containing estimates of the reproduction number
#' obtained with the Laplacian-P-splines methodology.
#' \item thetahat: The estimated vector of B-spline coefficients.
#' \item Sighat: The estimated variance-covariance matrix of the Laplace
#' approximation to the conditional posterior distribution of
#' the B-spline coefficients.
#' \item RCori: A data frame containing the estimates of the reproduction
#' obtained with the method of Cori (2013).
#' \item RWT: A data frame containing the estimates of the reproduction
#' obtained with the method of Wallinga-Teunis (2004).
#' \item LPS_elapsed: The routine real elapsed time (in seconds) when estimation
#' of the reproduction number is carried out with Laplacian-P-splines.
#' \item penparam: The estimated penalty parameter related to the P-spline model.
#' \item K: The number of B-splines used in the basis.
#' \item NegBinoverdisp: The estimated overdispersion parameter of the negative
#' binomial distribution for the incidence time series.
#' \item optimconverged: Indicates whether the algorithm to maximize the
#' posterior distribution of the hyperparameters has converged.
#' \item method: The method to estimate the reproduction number with Laplacian-P-splines.
#' \item optim_method: The chosen method to to maximize the posterior distribution
#' of the hyperparameters.
#' \item HPD90_Rt: The \eqn{90\%} HPD interval for Rt obtained with the LPS methodology.
#' \item HPD95_Rt: The \eqn{95\%} HPD interval for Rt obtained with the LPS methodology.
#' }
#'
#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr}
#'
#' @references Gressani, O., Wallinga, J., Althaus, C. L., Hens, N. and Faes, C.
#' (2022). EpiLPS: A fast and flexible Bayesian tool for estimation of the
#' time-varying reproduction number. \emph{Plos Computational Biology},
#' \strong{18(10): e1010618}.
#' @references Cori, A., Ferguson, N.M., Fraser, C., Cauchemez, S. (2013). A new
#' framework and software to estimate time-varying reproduction numbers during
#' epidemics. \emph{American Journal of Epidemiology}, \strong{178}(9):1505–1512.
#' @references Wallinga, J., & Teunis, P. (2004). Different epidemic curves for
#' severe acute respiratory syndrome reveal similar impacts of control measures.
#' \emph{American Journal of Epidemiology}, \strong{160}(6), 509-516.
#' @references White, L.F., Moser, C.B., Thompson, R.N., Pagano, M. (2021).
#' Statistical estimation of the reproductive number from case notification
#' data. \emph{American Journal of Epidemiology}, \strong{190}(4):611-620.
#' @references Eilers, P.H.C. and Marx, B.D. (1996). Flexible smoothing
#' with B-splines and penalties. \emph{Statistical Science},
#' \strong{11}(2):89-121.
#'
#' @examples
#' # Illustration on the 2009 influenza pandemic in Pennsylvania.
#' data(influenza2009)
#' epifit_flu <- estimRmcmc(incidence = influenza2009$incidence, dates = influenza2009$dates,
#' si = influenza2009$si[-1], niter = 2500,
#' burnin = 1500, progressbar = FALSE)
#' tail(epifit_flu$RLPS)
#' summary(epifit_flu)
#' plot(epifit_flu)
#'
#' @export
estimRmcmc <- function(incidence, si, K = 30, dates = NULL, niter = 5000,
burnin = 2000, CoriR = FALSE, WTR = FALSE,
priors = Rmodelpriors(), progressbar = TRUE){
tic <- proc.time() # Clock starts ticking
y <- KerIncidCheck(incidence) # Run checks on case incidence vector
n <- length(y) # Total number of days of the epidemic
simax <- length(si) # Length of serial interval distribution
B <- Rcpp_KercubicBspline(seq_len(n), lower = 1, upper = n, K = K) # C++ call
D <- diag(K)
penorder <- 2
for(k in 1:penorder){D <- diff(D)}
P <- t(D) %*% D # Penalty matrix of dimension c(K,K)
P <- P + diag(1e-06, K) # Perturbation to ensure P is full rank
# Prior specification
priorspec <- priors
a_delta <- priorspec$a_delta
b_delta <- priorspec$b_delta
phi <- priorspec$phi
a_rho <- priorspec$a_rho
b_rho <- priorspec$b_rho
# Minus log-posterior of hyperparameter vector
logphyper <- function(x) {
w <- x[1] # w = log(rho)
v <- x[2] # v = log(lambda)
# Laplace approximation
LL <- Rcpp_KerLaplace(theta0 = rep(1.5,K), exp(w), exp(v), K,
KerPtheta(Dobs = y, BB = B, Pen = P)$Dlogptheta,
KerPtheta(Dobs = y, BB = B, Pen = P)$D2logptheta)
thetastar <- as.numeric(LL$Lapmode)
logdetSigstar <- Re(LL$logdetSigma)
equal <- (-1) * (0.5 * logdetSigstar + 0.5 * (K + phi) * v + a_rho * w -
(0.5 * phi + a_delta) * log(0.5 * phi * exp(v) + b_delta) +
KerLikelihood(Dobs = y, BB = B)$loglik(thetastar, exp(w)) -
0.5 * exp(v) * sum((thetastar * P) %*% thetastar) -
b_rho * exp(w))
return(equal)
}
# Maximization of the posterior hyperparameters
optim_method <- "NelderMead"
optimhyper <- stats::optim(c(1, 5), fn = logphyper)
hypermap <- optimhyper$par
optimconverged <- (optimhyper$convergence == 0)
disphat <- exp(hypermap[1])
lambhat <- exp(hypermap[2])
Lap_approx <- Rcpp_KerLaplace(theta0 = rep(1.5,K), disphat, lambhat, K,
KerPtheta(Dobs = y, BB = B, Pen = P)$Dlogptheta,
KerPtheta(Dobs = y, BB = B, Pen = P)$D2logptheta)
thetahat <- as.numeric(Lap_approx$Lapmode)
muhat <- as.numeric(exp(B %*% thetahat))
Sighat <- Lap_approx$Lapvar
# Call Metropolis-adjusted Langevin algorithm
if(isTRUE(progressbar)){
cat(paste0("Metropolis-adjusted Langevin algorithm running for ",niter,
" iterations \n"))
progbar <- utils::txtProgressBar(min = 1, max = niter, initial = 1,
style = 3, char =">")
} else{
progbar <- NULL
}
MCMCout <- KerMCMC(Dobs = incidence, BB = B, Pen = P, Covar = Sighat,
thetaoptim = thetahat, penoptim = lambhat,
overdispoptim = disphat, progress = progbar,
priors = priorspec)$MALA(M=niter)
# Chain extraction
lambdaMALA <- coda::as.mcmc(MCMCout$lambda_mcmc[(burnin+1):niter])
deltaMALA <- coda::as.mcmc(MCMCout$delta_mcmc[(burnin+1):niter])
rhoMALA <- coda::as.mcmc(MCMCout$rho_mcmc[(burnin+1):niter])
thetaMALA <- coda::as.mcmc(MCMCout$theta_mcmc[(burnin+1):niter,])
accept_mcmc <- MCMCout$accept_rate
# Point estimation
thetahat_mcmc <- colMeans(thetaMALA)
lambdahat_mcmc <- mean(lambdaMALA)
rhohat_mcmc <- mean(rhoMALA)
muMALA_mcmc <- matrix(0, nrow = (niter - burnin), ncol = n)
for(j in 1:(niter-burnin)){
muMALA_mcmc[j,] <- as.numeric(exp(B %*% thetaMALA[j, ]))
}
mu_estim <- colMeans(muMALA_mcmc)
if(is.null(dates)) {
Time <- seq_len(n)
} else{
Time <- dates
}
RLPS <- data.frame(matrix(0, nrow = n, ncol = 10))
colnames(RLPS) <- c("Time", "R", "Rsd", "Rq0.025", "Rq0.05","Rq0.25",
"Rq0.50","Rq0.75", "Rq0.95", "Rq0.975")
RLPS$Time <- Time
HPD90_Rt <- data.frame(matrix(0, nrow = n, ncol = 2))
HPD95_Rt <- data.frame(matrix(0, nrow = n, ncol = 2))
colnames(HPD90_Rt) <- c("HPD90_low","HPD90_up")
colnames(HPD95_Rt) <- c("HPD95_low","HPD95_up")
rownames(HPD90_Rt) <- Time
rownames(HPD95_Rt) <- Time
for(j in 1:n){
CppCall <- as.numeric(Rcpp_KerRpostmcmc(t = j, BB = B, sinter = si,
thetasample = thetaMALA))
RLPS$R[j] <- mean(CppCall)
RLPS$Rsd[j] <- stats::sd(CppCall)
RLPS[j, (4:10)] <- stats::quantile(CppCall,
probs = c(0.025, 0.05, 0.25, 0.50, 0.75,
0.95, 0.975))
HPD90_Rt[j, ] <- coda::HPDinterval(coda::as.mcmc(CppCall), prob = 0.90)
HPD95_Rt[j, ] <- coda::HPDinterval(coda::as.mcmc(CppCall), prob = 0.95)
}
if (CoriR == TRUE) {# Use Cori method with weekly sliding windows
RCori <- KerCori(Dobs = incidence, sinter = si)
} else{
RCori <- "Not called"
}
if(WTR == TRUE){# Use Wallinga-Teunis method to estimate R
RWT <- KerWT(Dobs = incidence, sinter = si)
} else{
RWT <- "Not called"
}
toc <- proc.time() - tic
toc <- round(toc[3], 3)
#-- Output results in a list
outputlist <- list(incidence = y, si = si, RLPS = RLPS,
thetahat = thetahat,
Sighat = Sighat,
RCori = RCori, RWT = RWT,
LPS_elapsed = toc, penparam = lambdahat_mcmc, K = K,
NegBinoverdisp = rhohat_mcmc,
optimconverged = optimconverged,
method = "LPSMALA",
optim_method = optim_method,
HPD90_Rt = HPD90_Rt,
HPD95_Rt = HPD95_Rt)
attr(outputlist, "class") <- "Rt"
outputlist
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.