R/bootmex.R

#' Bootstrap a conditional multivariate extreme values model
#' 
#' Bootstrap a conditional multivariate extreme values model following the
#' method of Heffernan and Tawn, 2004.
#' 
#' Details of the bootstrap method are given by Heffernan and Tawn (2004). The
#' procedure is semi-parametric.
#' 
#' Firstly, values of all variables are simulated independently from the
#' parametric Gumbel or Laplace distributions (depending on the choice of
#' \code{margins} in the original call to \code{\link{mex}}). The sample size
#' and data dimension match that of the original data set.  Then an empirical
#' bootstrap sample is generated from the original data after its
#' transformation to the Gumbel/Laplace scale. Again, sample size and structure
#' match the original data set. The empirical bootstrap samples from each
#' margin are then sorted, and then replaced by their corresponding values from
#' the sorted Gumbel/Laplace samples. This procedure preserves the dependence
#' structure of the empirical bootstrap sample while ensuring the marginal
#' properties of the resulting semi-parametric bootstrap sample are those of
#' the parametric Gumbel/Laplace distribution.
#' 
#' The simulated, ordered Laplace/Gumbel sample is then transformed to the
#' scale of the original data by using the Probability Integral Transform.
#' Values beneath the original thresholds for fitting of the GPD tail models
#' are transformed by using the empirical distribution functions and for values
#' above these thresholds, the fitted GPDs are used.  This completes the
#' semi-parametric bootstrap from the data.
#' 
#' Parameter estimation is then carried out as follows: The parameters in the
#' generalized Pareto distributions are estimated by using the bootrap data,
#' these data are then transformed to the Laplace/Gumbel scale using the
#' orginal threshold, their empirical distribution function and these estimated
#' GPD parameters. The variables in the dependence structure of these variables
#' are then estimated.
#' 
#' Note that maximum likelihood estimation will often fail for small samples
#' when the generalized Pareto distribution is being fit. Therefore it will
#' often be useful to use penalized likelihood estimation. The function
#' \code{bootmex} does whatever was done in the call to \code{migpd} or
#' \code{mex} that generated the object with which it is being called.
#' 
#' Also note that sometimes (again, usually with small data sets) all of the
#' simulated Laplace/Gumbel random numbers will be beneath the threshold for
#' the conditioning variable. Such samples are abandoned by \code{bootmex} and
#' a new sample is generated. This probably introduces some bias into the
#' resulting bootstrap distributions.
#' 
#' The \code{plot} method produces histograms of bootstrap gpd parameters (the
#' default) or scatterplots of dependence parameters with the point estimates
#' for the original data shown.
#' 
#' By design, there is no \code{coef} method. The bootstrapping is done to
#' account for uncertainty. It is not obvious that adjusting the parameters for
#' the mean bias is the correct thing to do.
#' 
#' @aliases bootmex print.bootmex plot.bootmex
#' @usage bootmex(x, R = 100, nPass=3, trace=10,referenceMargin=NULL)
#' 
#' \method{plot}{bootmex}(x, plots = "gpd", main = "", ...)
#' \method{print}{bootmex}(x, ...)
#' @param x An object of class "mex" as returned by function \code{\link{mex}}.
#' @param R The number of bootstrap runs to perform. Defaults to \code{R}=100.
#' @param nPass An integer. Sometimes, particularly with small samples, the
#' estimation process fails with some bootstrap samples. The function checks
#' which runs fail and takes additional bootstrap samples in an attempt to get
#' parameter estimates. By default, it has nPass=3 attempts at this before
#' giving up.
#' @param trace How often to inform the user of progress. Defaults to
#' \code{trace=10}.
#' @param referenceMargin Optional set of reference marginal distributions to use for marginal transformation if the data's own marginal distribution is not appropriate (for instance if only data for which one variable is large is available, the marginal distributions of the other variables will not be represented by the available data).  This object can be created from a combination of datasets and fitted GPDs using the function \code{makeReferenceMarginalDistribution}.
#' @param plots What type of diagnostic plots to produce.  Defaults to "gpd" in
#' which case gpd parameter estimate plots are produced otherwise plots are
#' made for the dependence parameters.
#' @param main Title for plots.
#' @param ... Further arguments to be passed to methods.
#' @return An object of class 'bootmex'. Print and plot functions are
#' available.
#' @author Harry Southworth
#' @seealso \code{\link{migpd}}, \code{\link{mexDependence} },
#' \code{\link{bootmex}}, \code{\link{predict.mex}}.
#' @references J. E. Heffernan and J. A. Tawn, A conditional approach for
#' multivariate extreme values, Journal of the Royal Statistical society B, 66,
#' 497 -- 546, 2004
#' @keywords models multivariate
#' @examples
#' 
#' \donttest{
#' mymex <- mex(winter , mqu = .7, dqu = .7, which = "NO")
#' myboot <- bootmex(mymex)
#' myboot
#' plot(myboot,plots="gpd")
#' plot(myboot,plots="dependence")
#' } 
#' @export bootmex
bootmex <-
    # Bootstrap inference for a conditional multivaratiate extremes model.
function (x, R = 100, nPass = 3, trace = 10,referenceMargin=NULL) {
    theCall <- match.call()
    if (class(x) != "mex"){
      stop("object must be of type 'mex'")
    }

# Construct the object to be returned.
    ans <- list()
    ans$call <- theCall

    getTran <- function(i, x, data, mod, th, qu, margins) {
        param <- mod[[i]]$coefficients
        revTransform(margins$q2p(c(x[, i])), data = c(data[, i]), th = th[i],
                     qu = qu[i], sigma = exp(param[1]), xi = param[2])
    }

    mar <- x$margins
    dep <- x$dependence
    which <- dep$which
    constrain <- dep$constrain
    v <- dep$v
    dqu <- dep$dqu
    dth <- dep$dth
    margins <- dep$margins
    penalty <- mar$penalty
    priorParameters <- mar$priorParameters
    start <- 0.75* coef(x)$dependence[1:2,] # scale back towards zero in case point est on edge of original parameter space and falls off edge of constrained space for bootstrap sample

    n <- dim(mar$transformed)[[1]]
    d <- dim(mar$transformed)[[2]]
    dqu <- rep(dqu, d)
    dependent <- (1:d)[-which]

    ans$simpleDep <- dep$coefficients
    ans$dqu <- dqu
    ans$which <- which
    ans$R <- R
    ans$simpleMar <- mar
    ans$margins <- margins
    ans$constrain <- constrain

    innerFun <- function(i, x, which, dth, dqu, margins, penalty, priorParameters, constrain, v=v, start=start,
        pass = 1, trace = trace, n=n, d=d, getTran=getTran, dependent=dependent,referenceMargin=referenceMargin) {

        g <- sample(1:(dim(mar$transformed)[[1]]), size = n, replace = TRUE)
        g <- mar$transformed[g, ]
        ok <- FALSE

        while (!ok) {
          for (j in 1:(dim(g)[[2]])){
            u <- runif(nrow(g))
            g[order(g[, j]), j] <- sort(margins$p2q(u))
          }
          if (sum(g[, which] > dth) > 1  &   all(g[g[,which] > dth , which] > 0)){ ok <- TRUE }
        }

        g <- sapply(1:d, getTran, x = g, data = mar$data, margins=margins,
                    mod = mar$models, th = mar$mth, qu = mar$mqu)

        dimnames(g)[[2]] <- names(mar$models)

        ggpd <- migpd(g, mth = mar$mth,
                      penalty = penalty, priorParameters = priorParameters)

        gd <- mexDependence(ggpd, dqu = dqu, which = which, margins=margins[[1]], constrain=constrain, v=v, start=start,referenceMargin=referenceMargin)
        res <- list(GPD = coef(ggpd)[3:4, ],
                    dependence = gd$dependence$coefficients,
                    Z = gd$dependence$Z,
                    Y = g)

        if (pass == 1) {
            if (i%%trace == 0) {
                cat(paste(i, "replicates done\n"))
            }
        }
        res
    } # Close innerFun

    res <- lapply(1:R, innerFun, x = x, which = which, dth = dth, margins=margins,
        dqu = dqu, penalty = penalty, priorParameters = priorParameters, constrain=constrain, v=v, start=start,
        pass = 1, trace = trace, getTran=getTran, n=n, d=d, dependent=dependent,referenceMargin=referenceMargin)

    # Sometimes samples contain no extreme values. Need to have another pass or two
    if (nPass > 1) {
        for (pass in 2:nPass) {
            rerun <- sapply(res, function(x) any(sapply(x, function(x) any(is.na(x)))))
            wh <- !unlist(lapply(res, function(x) dim(x$Z)[[1]] > 0))
            rerun <- apply(cbind(rerun, wh), 1, any)
            if (sum(rerun) > 0) {
                message("Pass", pass, ":", sum(rerun), "samples to rerun.\n")
                rerun <- (1:R)[rerun]
                res[rerun] <- lapply((1:R)[rerun], innerFun,
                  x = x, which = which, dth = dth, dqu = dqu, margins=margins,
                  penalty = penalty, priorParameters = priorParameters, constrain=constrain, v=v, start=start,
                  pass = pass, trace = trace, getTran=getTran, n=n, d=d, dependent=dependent,referenceMargin=referenceMargin)
            }
        }
    }

    ans$boot <- res
    oldClass(ans) <- c("bootmex", "mex")
    ans
}

Try the texmex package in your browser

Any scripts or data that you put into this service are public.

texmex documentation built on May 2, 2019, 5:41 a.m.