R/dhhh4sims.R

################################################################################
### Derive Probability Mass Functions from hhh4 Simulations
###
### Copyright (C) 2018 Sebastian Meyer
###
### This file is part of the R package "HIDDA.forecasting",
### free software under the terms of the GNU General Public License, version 2,
### or (at your option) any later version, a copy of which is available at
### https://www.R-project.org/Licenses/.
################################################################################

## derive the mean values on which the samples are based (in simHHH4)
## NOTE: it would be more efficient to have these returned from simHHH4 as well
means_hhh4sims <- function (sims, model)
{
    stopifnot(inherits(sims, "hhh4sims"), inherits(model, "hhh4"))
    if (!is.array(sims))
        stop("only implemented for simulations with 'simplify = TRUE'")

    simSubset <- as.integer(rownames(sims))
    stsObj <- model$stsObj
    nsim <- dim(sims)[3L]

    getMeans1 <- function (i) {  # from the i'th simulation
        stsObj@observed[simSubset,] <- sims[,,i,drop=FALSE]
        terms <- surveillance:::interpretControl(model$control, stsObj)
        surveillance::meanHHH(model$coefficients, terms, simSubset, TRUE)
    }
    means <- vapply(X = seq_len(nsim), FUN = getMeans1,
                    FUN.VALUE = matrix(0, length(simSubset), model$nUnit),
                    USE.NAMES = FALSE)
    dimnames(means) <- dimnames(sims)

    ## at the first time point, means are the same for all simulations
    ## stopifnot(length(unique.default(means[1,1,])) == 1)

    return(means)
}


##' `hhh4`-Based Forecast Distributions
##'
##' The function `dhhh4sims` constructs a (non-vectorized)
##' probability mass function from the result of
##' [surveillance::simulate.hhh4()] (and the corresponding
##' model), as a function of the time point within the simulation period.
##' The distribution at each time point is obtained as a mixture of
##' negative binomial (or Poisson) distributions based on the samples from
##' the previous time point.
##'
##' @param sims a `"hhh4sims"` object from [surveillance::simulate.hhh4()].
##' @param model the ["hhh4"][surveillance::hhh4] object underlying `sims`.
##' @return a `function(x, tp = 1, log = FALSE)`, which takes a
##'     vector of `model$nUnit` counts and calculates the
##'     (`log`-)probability of observing these counts (given the
##'     `model`) at the `tp`'th time point of the simulation
##'     period (index or character string matching `rownames(sims)`).
##' @keywords distribution
##' @author Sebastian Meyer
##' @seealso [logs_hhh4sims()] where this function is used.
##' @examplesIf requireNamespace("surveillance")
##' library("surveillance")
##' CHILI.sts <- sts(observed = CHILI,
##'                  epoch = as.integer(index(CHILI)), epochAsDate = TRUE)
##'
##' ## fit a simple hhh4 model
##' (f1 <- addSeason2formula(~ 1, period = 365.2425))
##' fit <- hhh4(
##'     stsObj = CHILI.sts,
##'     control = list(ar = list(f = f1), end = list(f = f1), family = "NegBin1")
##' )
##'
##' ## simulate the last four weeks (only 200 runs, for speed)
##' sims <- simulate(fit, nsim = 200, seed = 1, subset = 884:nrow(CHILI.sts),
##'                  y.start = observed(CHILI.sts)[883,])
##' if (requireNamespace("fanplot")) {
##'     plot(sims, "fan", fan.args = list(ln = c(5,95)/100),
##'          observed.args = list(pch = 19), means.args = list(type = "b"))
##' }
##'
##' ## derive the weekly forecast distributions
##' dfun <- dhhh4sims(sims, fit)
##' dfun(4000, tp = 1)
##' dfun(4000, tp = 4)
##' curve(sapply(x, dfun, tp = 4), 0, 30000, type = "h",
##'       main = "4-weeks-ahead forecast",
##'       xlab = "No. infected", ylab = "Probability")
##'
##' ## compare the forecast distributions with the simulated counts
##' par(mfrow = n2mfrow(nrow(sims)))
##' for (tp in 1:nrow(sims)) {
##'     MASS::truehist(sims[tp,,], xlab = "counts", ylab = "Probability")
##'     curve(sapply(x, dfun, tp = tp), add = TRUE, lwd = 2)
##' }
##'
##' \dontshow{
##' ## verify distribution at the first time point (i.e., one-step-ahead NegBin)
##' stopifnot(identical(
##'     sapply(0:100, dfun, tp = 1),
##'     dnbinom(0:100,
##'             mu = meanHHH(fit$coefficients, terms(fit), subset = 884, total.only = TRUE),
##'             size = sizeHHH(fit$coefficients, terms(fit), subset = 884))
##' ))
##' ## check that we have a probability distribution at the last time point
##' .xgrid <- seq(0, 100000, by = 500)
##' stopifnot(abs(1 -
##'     integrate(approxfun(.xgrid, sapply(.xgrid, dfun, tp = 4)), 0, 100000)$value
##' ) < 0.001)
##' }
##' @export
dhhh4sims <- function (sims, model)
{
    stopifnot(inherits(sims, "hhh4sims"), inherits(model, "hhh4"))

    ## environment for the resulting functions
    env <- new.env(parent = getNamespace("stats"))

    ## number of units
    env$nUnit <- model$nUnit

    ## sequential means on which the samples were based (in simHHH4)
    env$means <- means_hhh4sims(sims, model)  # array with same dim() as sims

    ## check against meanHHH() output for the first time point of the simulation
    ## means_observed <- surveillance::meanHHH(model$coefficients, stats::terms(model),
    ##                                         subset = as.integer(rownames(sims)))
    ## stopifnot(all(means_observed$mean[1,] == env$means[1,,]))

    ## overdispersion is time-constant (of length 1 or nUnit)
    env$size <- c(surveillance::sizeHHH(model$coefficients, stats::terms(model), subset = 1))

    ## one-step-ahead distribution
    env$dOSA <- with(env, if (is.null(size)) {
        dpois
    } else {
        ## "formals<-"(dnbinom, value = modifyList(formals(dnbinom), list(size = size)))
        function (x, mu) dnbinom(x, mu = mu, size = size)
    })

    ## PMFs are mixtures of OSA distributions (with equal weight)
    dfun <- with(env, function(x, tp = 1, log = FALSE) {
        if (length(tp) != 1) stop("'tp' must have length 1")
        dsamples <- dOSA(x, means[tp,,])  # drop = TRUE
        ## if nUnit == 1, dsamples is a vector else a nUnit x nsim matrix
        prob <- if (length(x) == 1) {
            mean(dsamples)
        } else if (length(x) == nUnit) {
            .rowMeans(dsamples, nUnit, ncol(dsamples))
        } else stop("'x' must have length ", nUnit)
        if (log) log(prob) else prob
    })

    dfun
}


##' Simulation-Based Logarithmic Score Using `dhhh4sims`
##'
##' The function `logs_hhh4sims` computes the logarithmic score of the
##' forecast distributions based on a [surveillance::hhh4()] `model` and
##' [simulations][surveillance::simulate.hhh4] (`sims`) thereof. The
##' forecast distributions are obtained via [dhhh4sims()] as sequential
##' mixtures of negative binomial (or Poisson) distributions, which is
##' different from the kernel density estimation approach employed in
##' [scores_sample()].
##'
##' @param observed a vector or matrix of observed counts during the
##'     simulation period. By default (`NULL`), this is taken from
##'     `attr(sims, "stsObserved")`.
##' @param sims a `"hhh4sims"` object from
##'     [surveillance::simulate.hhh4()].
##' @param model the [surveillance::hhh4()] fit underlying `sims`.
##' @return a vector or matrix of log-scores for the `observed` counts.
##' @keywords univar
##' @author Sebastian Meyer
##' @seealso [scores_sample()] for an alternative approach of calculating
##'     the logarithmic score from simulation-based forecasts
##' @export
logs_hhh4sims <- function (observed = NULL, sims, model)
{
    dfun <- dhhh4sims(sims, model)  # checks the classes of the arguments
    observed <- if (is.null(observed)) {
        attr(sims, "stsObserved")@observed
    } else {
        stopifnot(NROW(observed) == nrow(sims), NCOL(observed) == model$nUnit)
        as.matrix(observed)
    }
    -vapply(X = seq_len(nrow(observed)),
            FUN = function (tp) dfun(observed[tp,], tp, log = TRUE),
            FUN.VALUE = numeric(ncol(observed)), USE.NAMES = FALSE)
}
HIDDA/forecasting documentation built on Jan. 11, 2024, 1:11 a.m.