#' Estimation of the reproduction number with Laplacian-P-splines
#'
#' @description
#' This routine estimates the instantaneous reproduction number \eqn{R_t}; the mean number
#' of secondary infections generated by an infected individual at time \eqn{t} (White et
#' al. 2020); by using Bayesian P-splines and Laplace approximations (Gressani et al. 2022).
#' Estimation of \eqn{R_t} is based on a time series of incidence counts and (a discretized) serial
#' interval distribution. The negative binomial distribution is used to model incidence
#' count data and P-splines (Eilers and Marx, 1996) are used to smooth the epidemic curve. The link
#' between the epidemic curve and the reproduction number is established via the renewal equation.
#'
#' @usage estimR(incidence, si, K = 30, dates = NULL, maxmethod = c("NelderMead","HillClimb"),
#' CoriR = FALSE, WTR = FALSE, optimstep = 0.3, priors = Rmodelpriors())
#'
#' @param incidence A vector containing the incidence time series. If \code{incidence}
#' contains NA values at certain time points, these are replaced by the average of
#' the left- and right neighbor counts. If the right neighbor is NA, the left neighbor
#' is used as a replacement value.
#' @param si The (discrete) serial interval distribution.
#' @param K Number of B-splines in the basis.
#' @param dates A vector of dates in format "YYYY-MM-DD" (optional).
#' @param maxmethod The method to maximize the hyperparameter posterior distribution.
#' @param CoriR Should the \eqn{R_t} estimate of Cori (2013) be also computed?
#' @param WTR Should the \eqn{R_t} estimate of Wallinga-Teunis (2004) be also computed?
#' @param optimstep Learning rate for the "HillClimb" method to maximize the
#' posterior distribution of the hyperparameters.
#' @param priors A list containing the prior specification of the model
#' hyperparameters as set in Rmodelpriors. See ?Rmodelpriors.
#'
#' @details The \code{estimR} routine estimates the reproduction number in a
#' totally "sampling-free" fashion. The hyperparameter vector (containing the
#' penalty parameter of the P-spline model and the overdispersion parameter of
#' the negative binomial model for the incidence time series) is fixed at its
#' maximum a posteriori (MAP). By default, the algorithm for maximization is
#' the one of Nelder and Mead (1965). If \code{maxmethod} is set to "HillClimb",
#' then a gradient ascent algorithm is used to maximize the hyperparameter posterior.
#'
#' @return A list with the following components:
#' \itemize{
#' \item incidence: The incidence time series.
#' \item si: The serial interval distribution.
#' \item RLPS: A data frame containing estimates of the reproduction number
#' obtained with the Laplacian-P-splines methodology.
#' \item thetahat: The estimated vector of B-spline coefficients.
#' \item Sighat: The estimated variance-covariance matrix of the Laplace
#' approximation to the conditional posterior distribution of
#' the B-spline coefficients.
#' \item RCori: A data frame containing the estimates of the reproduction
#' obtained with the method of Cori et al. (2013).
#' \item RWT: A data frame containing the estimates of the reproduction
#' obtained with the method of Wallinga-Teunis (2004).
#' \item LPS_elapsed: The routine real elapsed time (in seconds) when estimation
#' of the reproduction number is carried out with Laplacian-P-splines.
#' \item Cori_elapsed: The routine real elapsed time (in seconds) when estimation
#' of the reproduction number is carried out with the method of Cori et al. (2013).
#' \item penparam: The estimated penalty parameter related to the P-spline model.
#' \item K: The number of B-splines used in the basis.
#' \item NegBinoverdisp: The estimated overdispersion parameter of the negative
#' binomial distribution for the incidence time series.
#' \item optimconverged: Indicates whether the algorithm to maximize the
#' posterior distribution of the hyperparameters has converged.
#' \item method: The method to estimate the reproduction number with Laplacian-P-splines.
#' \item optim_method: The chosen method to to maximize the posterior distribution
#' of the hyperparameters.
#' }
#'
#' @author Oswaldo Gressani \email{oswaldo_gressani@hotmail.fr}
#'
#' @references Gressani, O., Wallinga, J., Althaus, C. L., Hens, N. and Faes, C.
#' (2022). EpiLPS: A fast and flexible Bayesian tool for estimation of the
#' time-varying reproduction number. \emph{Plos Computational Biology},
#' \strong{18}(10): e1010618.
#' @references Cori, A., Ferguson, N.M., Fraser, C., Cauchemez, S. (2013). A new
#' framework and software to estimate time-varying reproduction numbers during
#' epidemics. \emph{American Journal of Epidemiology}, \strong{178}(9):1505–1512.
#' @references Wallinga, J., & Teunis, P. (2004). Different epidemic curves for
#' severe acute respiratory syndrome reveal similar impacts of control measures.
#' \emph{American Journal of Epidemiology}, \strong{160}(6), 509-516.
#' @references White, L.F., Moser, C.B., Thompson, R.N., Pagano, M. (2021).
#' Statistical estimation of the reproductive number from case notification
#' data. \emph{American Journal of Epidemiology}, \strong{190}(4):611-620.
#' @references Eilers, P.H.C. and Marx, B.D. (1996). Flexible smoothing
#' with B-splines and penalties. \emph{Statistical Science},
#' \strong{11}(2):89-121.
#'
#' @examples
#' # Illustration on simulated data
#' si <- Idist(mean = 5, sd = 3)$pvec
#' datasim <- episim(si = si, endepi = 60, Rpattern = 5, dist="negbin", overdisp = 50)
#' epifit_sim <- estimR(incidence = datasim$y, si = si, CoriR = TRUE)
#' plot(epifit_sim, addfit = "Cori")
#'
#' # Illustration on the 2003 SARS epidemic in Hong Kong.
#' data(sars2003)
#' epifit_sars <- estimR(incidence = sars2003$incidence, si = sars2003$si, K = 40)
#' tail(epifit_sars$RLPS)
#' summary(epifit_sars)
#' plot(epifit_sars)
#'
#' @export
estimR <- function(incidence, si, K = 30, dates = NULL,
maxmethod = c("NelderMead", "HillClimb"),
CoriR = FALSE, WTR = FALSE, optimstep = 0.3,
priors = Rmodelpriors()){
tic <- proc.time() # Clock starts ticking
y <- KerIncidCheck(incidence) # Run checks on case incidence vector
n <- length(y) # Total number of days of the epidemic
simax <- length(si) # Length of serial interval distribution
B <- Rcpp_KercubicBspline(seq_len(n), lower = 1, upper = n, K = K) # C++ call
D <- diag(K)
penorder <- 2
for(k in 1:penorder){D <- diff(D)}
P <- t(D) %*% D # Penalty matrix of dimension c(K,K)
P <- P + diag(1e-06, K) # Perturbation to ensure P is full rank
# Prior specification
priorspec <- priors
a_delta <- priorspec$a_delta
b_delta <- priorspec$b_delta
phi <- priorspec$phi
a_rho <- priorspec$a_rho
b_rho <- priorspec$b_rho
# Minus log-posterior of hyperparameter vector
logphyper <- function(x) {
w <- x[1] # w = log(rho)
v <- x[2] # v = log(lambda)
# Laplace approximation
LL <- Rcpp_KerLaplace(theta0 = rep(1.5,K), exp(w), exp(v), K,
KerPtheta(Dobs = y, BB = B, Pen = P)$Dlogptheta,
KerPtheta(Dobs = y, BB = B, Pen = P)$D2logptheta)
thetastar <- as.numeric(LL$Lapmode)
logdetSigstar <- Re(LL$logdetSigma)
equal <- (-1) * (0.5 * logdetSigstar + 0.5 * (K + phi) * v + a_rho * w -
(0.5 * phi + a_delta) * log(0.5 * phi * exp(v) + b_delta) +
KerLikelihood(Dobs = y, BB = B)$loglik(thetastar, exp(w)) -
0.5 * exp(v) * sum((thetastar * P) %*% thetastar) -
b_rho * exp(w))
return(equal)
}
# Gradient of logphyper
Dlogphyper <- function(x, thetavec){
w <- x[1] # w = log(rho)
v <- x[2] # v = log(lambda)
# Laplace approximation
LL <- Rcpp_KerLaplace(theta0 = thetavec, exp(w), exp(v), K,
KerPtheta(Dobs = y, BB = B, Pen = P)$Dlogptheta,
KerPtheta(Dobs = y, BB = B, Pen = P)$D2logptheta)
thetastar <- as.numeric(LL$Lapmode)
Btheta <- as.numeric(B%*%thetastar)
Dloglik_w <- sum(exp(w) * (digamma(y + exp(w)) - digamma(exp(w)) +
(1 + w) - (log(exp(Btheta) + exp(w)) +
(1 / (1 + exp(Btheta - w))))) -
y * (1 / (1 + exp(Btheta - w))))
grad_w <- 0.5 * Re(LL$dSigw) + a_rho - b_rho * exp(w) + Dloglik_w
grad_v <- 0.5 * Re(LL$dSigv) + 0.5 * (K + phi) - ((0.5 * phi + a_delta) /
(0.5 * phi * exp(v) + b_delta)) *
(0.5 * phi * exp(v)) - 0.5 * exp(v) * sum((thetastar * P) %*% thetastar)
return(list(grad=c(grad_w, grad_v), thetastar = thetastar))
}
# Maximization of the posterior hyperparameters
optim_method <- match.arg(maxmethod)
if(optim_method == "NelderMead") {
optimhyper <- stats::optim(c(1, 5), fn = logphyper)
hypermap <- optimhyper$par
optimconverged <- (optimhyper$convergence == 0)
} else if (optim_method == "HillClimb") {
hypermap <- as.numeric(Rcpp_Kerhyperoptim(c(1, 5), BB = B,
grad = Dlogphyper, step = optimstep)$res)
optimconverged <- "Convergence for HillClimb not yet available"
}
disphat <- exp(hypermap[1])
lambhat <- exp(hypermap[2])
Lap_approx <- Rcpp_KerLaplace(theta0 = rep(1.5,K), disphat, lambhat, K,
KerPtheta(Dobs = y, BB = B, Pen = P)$Dlogptheta,
KerPtheta(Dobs = y, BB = B, Pen = P)$D2logptheta)
thetahat <- as.numeric(Lap_approx$Lapmode)
muhat <- as.numeric(exp(B %*% thetahat))
Sighat <- Lap_approx$Lapvar
# Check if a correction of CI on R should be done
cratio <- (1 + (1 / disphat) * mean(muhat)) / disphat
if (cratio <= 1) {
EMV <- (1 / (1 + muhat / disphat))
} else if (cratio > 1) {
EMV <- rep(1, n)
}
if(is.null(dates)) {
Time <- seq_len(n)
} else{
Time <- dates
}
# Computation of summary statistics for reproduction number with LPS
Routcpp <- Rcpp_KerRpostmap(BB = B, theta = thetahat, Covar = Sighat,
sinter = si, MVvec = EMV)
RLPS <- data.frame(cbind(Time, as.numeric(Routcpp$R), as.numeric(Routcpp$Rsd),
matrix(unlist(lapply(c(0.025,0.05,0.25,0.50,0.75,0.95,0.975),
FUN = stats::qlnorm, meanlog = Routcpp$meanlogNorm,
sdlog = Routcpp$sdlogNorm)),nrow = n, byrow = F)))
colnames(RLPS) <- c("Time", "R", "Rsd", "Rq0.025", "Rq0.05","Rq0.25",
"Rq0.50", "Rq0.75", "Rq0.95", "Rq0.975")
RLPS$Time <- Time
toc <- proc.time() - tic
toc <- round(toc[3], 3)
if (CoriR == TRUE) {# Use Cori method with weekly sliding windows
tic_Cori <- proc.time()
RCori <- KerCori(Dobs = incidence, sinter = si)
toc_Cori <- proc.time() - tic_Cori
} else{
RCori <- "Not called"
toc_Cori <- "Not called"
}
if(WTR == TRUE){# Use Wallinga-Teunis method to estimate R
RWT <- KerWT(Dobs = incidence, sinter = si)
} else{
RWT <- "Not called"
}
#-- Output results in a list
outputlist <- list(incidence = y, si = si, RLPS = RLPS,
thetahat = thetahat,
Sighat = Sighat,
RCori = RCori, RWT = RWT,
LPS_elapsed = toc,
Cori_elapsed = toc_Cori,
penparam = lambhat, K = K,
NegBinoverdisp = disphat,
optimconverged = optimconverged,
method = "LPSMAP",
optim_method = optim_method)
attr(outputlist, "class") <- "Rt"
outputlist
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.