#' Simulate from time-homogeneous underreported model
#'
#' Simulate from a time-homogeneous endemic-epidemic model with underreporting. Includes a burn-in period
#' to reach stationary phase.
#'
#' @param nu,phi,kappa,psi the model parameters (scalars)
#' @param q the reporting probability
#' @param lgt the length of the generated time series (integer)
#' @param start initial value of both $X$ and $lambda$ (at beginning of burn in period)
#' @param burn_in number of iterations to discard to reach stationary phase
#' @return A named list with elements \code{"X"}, \code{"X_tilde"} and \code{"lambda"} containing
#' the unthinned and thinned simulated time series as well as the conditional mean process.
#' @examples
#' sim <- simulate_hhh4u(nu = 3, phi = 0.4, kappa = 0.2, psi = 0.1, q = 0.5, lgt = 100)
#' @export
simulate_hhh4u_homog <- function(nu, phi, kappa, psi, q = 1, lgt = 100,
start = 10, burn_in = 1000){
lgt_total <- lgt + burn_in
lambda <- X <- X_tilde <- numeric(lgt_total)
lambda[1] <- round(nu/(1 - phi - kappa))
X[1] <- rnbinom(1, mu = lambda[1], size = 1/psi)
for(i in 2:lgt_total){
lambda[i] <- nu + phi*X[i - 1] + kappa*lambda[i - 1]
X[i] <- rnbinom(1, mu = lambda[i], size = 1/psi)
}
X <- tail(X, lgt)
lambda <- tail(lambda, lgt)
X_tilde <- rbinom(lgt, X, q)
list(X = X, lambda = lambda, X_tilde = X_tilde)
}
# function to compute second-order properties (of both the latent and the
# observed process). The ACF is given by rho(d) = gh^(d - 1)
compute_sop_homog <- function(nu, phi, kappa, psi, q, par_list = NULL){
# extract parameters if provided in list form
if(!is.null(par_list)){
nu <- par_list$nu; phi <- par_list$phi; kappa <- par_list$kappa
psi <- par_list$psi; q <- par_list$q
names(nu) <- names(phi) <- names(kappa) <- names(psi) <- names(q) <- NULL
}
# lists to store results:
X <- X_tilde <- list()
# properties of latent process X:
X$mu <- nu/(1 - phi - kappa)
X$sigma2 <- (1 - (phi + kappa)^2 + phi^2)/
(1 - (phi + kappa)^2 - psi*phi^2) *(X$mu + X$mu^2*psi)
X$g <- phi*(1 - kappa*(phi + kappa))/(1 - (phi + kappa)^2 + phi^2)
X$h <- phi + kappa
# properties of observeable process X_tilde:
X_tilde$mu <- q*X$mu
X_tilde$sigma2 <- q^2*X$sigma2 + q*(1 - q)*X$mu
X_tilde$g <- X$sigma2/(X$sigma2 + (1 - q)/q*X$mu)*X$g
X_tilde$h <- phi + kappa
return(list(X = X,
X_tilde = X_tilde))
}
# function to recover nu, phi, kappa, psi from the second-order
# properties for a given q:
recover_pars_homog <- function(mu, sigma2, g, h, q = 1, sop_list = NULL){
# extract second-order properties if provided as list:
if(!is.null(sop_list)){
mu <- sop_list$mu; sigma2 <- sop_list$sigma2; g <- sop_list$g; h <- sop_list$h
}
# recover parameters:
pars <- NULL
pars$nu <- mu*(1 - h)/q
c <- (sigma2 - (1 - q)*mu)/sigma2
pars$phi <- (sqrt(c^2*(1 - h^2)^2 + 4*(c*h - g)*g*(1 - h^2)) - c*(1 - h^2))/
(2*(c*h - g))
pars$kappa <- h - pars$phi
pars$psi <- ((sigma2 - (1 - q)*mu)*(1 - h^2) - q*mu*(1 - h^2 + pars$phi^2))/
(pars$phi^2*(sigma2 - (1 - q)*mu) + mu^2*(1 - h^2 + pars$phi^2))
pars$q <- q
# if(any(pars < 0)) print(pars)# stop("Parameter recovery not successful -- at least one parameter is negative.")
return(pars)
}
# re-parameterize (approximate) a model to a fully observed process
reparam_homog <- function(nu, phi, kappa, psi, q){
# get second-order properties
sop <- compute_sop_homog(nu = nu, phi = phi, kappa = kappa, psi = psi, q = q)$X_tilde
# recover parameters for q = 1
recover_pars_homog(sop_list = sop, q = 1)
}
#' Fitting time-homogeneous underreported endemic-epidemic model using maximum likelihood
#'
#' Fits a time-homogeneous endemic-epidemic model with underreporting using an approximative maximum
#' likelihood scheme. The likelihood approximation is based on an approximation of the process by
#' a second-order equivalent process with complete reporting. The reporting probability cannot be
#' estimated from the data (in most cases these contain no information on it) and thus needs to be
#' specified in advance.
#'
#' @param observed a time series of counts (numeric vector)
#' @param q the assumed reporting probability
#' @param initial the initial value of the parameter vector passed to optim
#' (note: the function re-runs optimization several times)
#' @param max_lag in evaluation of (conditional) likelihood only lags up to \code{max_lag} are taken into account
#' @return a named list containing the following elements:
#' \itemize{
#' \item parameter estimates on the original scale (code{par}) and associated
#' standard errors (\code{se}), obtained via the delta method
#' \item parameter estimates on the log scale (as used in the optimization; \code{par_log}),
#' associated standard errors (\code{se_log}) and inverse Fisher information matrix (\code{Sigma_log})
#' \item the return object of the call to \code{optim} (\code{opt})
#' \item the data (\code{observed}), reporting probability (\code{q}) and other settings \code{settings}
#' used in the call to \code{fit_hhh4u_homog}
#' }
#' @export
fit_hhh4u_homog <- function(observed, q, include_kappa = TRUE,
initial = c(log_nu = 2, log_phi = -1, log_kappa = -2, log_psi = -3),
max_lag = 5, iter_optim = 3, hessian = TRUE, ...){
# remove kappa from initial values if desired:
if(include_kappa == FALSE){
initial <- initial[names(initial) != "alpha_kappa"]
}
# wrapper around nllik_homog to be passed to optim:
nllik_vect <- function(pars){
# extract parameter values:
nu <- exp(pars["log_nu"])
phi <- exp(pars["log_phi"])
kappa <- ifelse(include_kappa, exp(pars["log_kappa"]), -10)
psi <- exp(pars["log_psi"])
nllik_homog(observed = observed, nu = nu, phi = phi, kappa = kappa, psi = psi, q = q, max_lag = max_lag)
}
# optimization:
opt <- optim(par = initial, fn = nllik_vect, hessian = hessian, ...)
# re-run optimization starting from previous optimum (improves convergence)
for(i in 1:iter_optim){
opt <- optim(par = opt$par, fn = nllik_vect, hessian = hessian, ...)
}
# prepare return object:
ret <- list()
ret$par <- exp(opt$par)
names(ret$par) <- c("nu", "phi", "kappa", "psi")
if(hessian){
Sigma <- solve(opt$hessian)
ret$Sigma_log <- Sigma
ret$se <- if(hessian) sqrt(diag(Sigma)*exp(opt$par)^2) else NULL
names(ret$se) <- c("nu", "phi", "kappa", "psi")
}else{
ret$se <- NULL
ret$Sigma <- NULL
}
ret$par_log <- opt$par
ret$se_log <- if(hessian) diag(Sigma) else NULL
ret$opt <- opt
ret$observed <- observed
ret$q <- q
ret$settings <- list(max_lag = max_lag, iter_optim = iter_optim,
initial = initial, hessian = hessian)
return(ret)
}
#' Evaluate the approximate log-likelihood of a time-homogeneous underreported endemic-epidemic model
#'
#' The likelihood approximation is based on an approximation of the process by
#' a second-order equivalent process with complete reporting.
#'
#' @param observed a time series of counts (numeric vector)
#' @param nu,phi,kappa,psi the model parameters (scalars)
#' @param q the assumed reporting probability
#' @param max_lag in evaluation of likelihood only lags up to max_lag are taken into account
#' @param return_contributions shall the log-likelihood contributions of each time point be
#' returned (as vector)?
#' @return The log-likelihood as scalar or (if \code{return_contributions == TRUE}) the vector of
#' log-likelihood contributions.
nllik_homog <- function(observed, nu, phi, kappa, psi, q, max_lag = 5, return_contributions = FALSE){
# get second order properties:
sop <- compute_sop_homog(nu = nu, phi = phi, kappa = kappa, psi = psi, q = q)$X_tilde
# return -Inf if non-statinary:
if(any(unlist(sop) < 0)) return(-Inf)
# get corresponding parameters for unthinned process:
pars_Y <- recover_pars_homog(q = 1, sop_list = sop)
nu_Y <- pars_Y$nu
phi_Y <- pars_Y$phi
kappa_Y <- pars_Y$kappa
psi_Y <- pars_Y$psi
nu_Y_star <- nu_Y/(1 - kappa_Y) # move to geometric-lag display
q <- 1
# get fitted values:
lgt <- length(observed)
mod_matr <- matrix(nrow = length(observed), ncol = max_lag)
for(i in 1: max_lag){
mod_matr[, i] <- c(rep(NA, i), head(observed, lgt - i))
}
lin_pred <- nu_Y_star + mod_matr %*% matrix(phi_Y*kappa_Y^(1:max_lag - 1), ncol = 1)
# evaluate likelihood:
llik <- - dnbinom(observed, mu = as.vector(lin_pred), size = 1/psi_Y, log = TRUE)
if(return_contributions) return(llik) else return(sum(llik[-(1:max_lag)]))
}
# helper function to obtain second-order properties of simulated time series
emp_sop_homog <- function(observed){
lgt <- length(observed)
list(
mean = mean(observed),
var = var(observed),
ar1 = cor(head(observed, lgt - 1), tail(observed, lgt - 1)),
ar2 = cor(head(observed, lgt - 2), tail(observed, lgt - 2))
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.