R/helpers.R

Defines functions stack.list margaritaScale margarita.rp.matrix margarita.getProbs margarita.rp getCIquantiles getSegmentData

Documented in getCIquantiles getSegmentData margarita.getProbs margarita.rp margarita.rp.matrix margaritaScale stack.list

#' Get data for plotting segments representing interval estimates.
#' @param data A data frame with either 5 or 3 columns representing the point
#         estimates and interval estimates, and a column called 'group'.
getSegmentData <- function(data){
    if (ncol(data) > 5){
        seg1 <- data.frame(lo=data[, 1], hi=data[, 5], group=data$groups, stringsAsFactors=FALSE)
        seg2 <- data.frame(lo=data[, 2], hi=data[, 4], group=data$groups, stringsAsFactors=FALSE)
        if (any(seg1[, "lo"] > seg1[, "hi"]) | any(seg2[, "lo"] > seg2[, "hi"])){
            stop("CI segments with lo > hi")
        }
    }
    else if (ncol(data) > 3) {
        seg1 <- data.frame(lo=data[, 1], hi=data[, 3], group=data$groups, stringsAsFactors=FALSE)
        seg2 <- NULL
        if (any(seg1[, "lo"] > seg1[, "hi"])){
            stop("CI segments with lo > hi")
        }
    }
    else {
        stop("segment data has 3 or fewer columns")
    }
    list(seg1, seg2)
}

#' Get quantiles representing confidence limits from a given test size.
#' @param alpha The size of the test. Values {alpha/2, 1 - alpha/2} will be
#'        returned. \code{alpha} can have length either 1 or 2.
getCIquantiles <- function(alpha){
    # Get quantiles for CI plus the median, in order
    if (length(alpha) > 2){ stop("alpha can have length 1 or 2") }
    if (any(alpha >= 1) | any(alpha <= 0)){ stop("alpha should be on the interval (0, 1)") }
    alpha <- sort(alpha)
    c(alpha/2, 0.50, 1-rev(alpha)/2)
}

#' Return period for GPD
#'
#' Compute the return period for a generalized Pareto distribution
#' @param X An indexing parameter used in a call to \code{sapply} or \code{lapply}.
#' @param xm The threshold above which we wish to estimated the exceedance rate.
#' @param u The threshold.
#' @param par Matrix of parameter values.
#' @param p The rate of threshold exceedance.
#' @param r The data to which the original model was fit.
#' @param family The family object.
#' @param th The threshold used in fitting the model.
margarita.rp <- function(X, xm, u, par, p, r, family, th) {
  # r are the residuals above the modelling threshold
  xm <- xm[, X]
  ## this correctly handles values above the upper limit

  res <- p * (1 - family$prob(xm, par, list(threshold=u)))# pgpd(xm, exp(phi), xi, u, lower.tail=FALSE)
  wh <- u > xm

  if (any(wh)){
    r <- r[r < quantile(r, 1 - p)]
    res[wh] <- sapply(1:sum(wh),
                      function(i, x, r, m, p){
                        rr <- r + x[i]# - quantile(r, 1-p)
                        mean(rr > m[i])
                      },
                      x=u[wh], r=r, m=xm[wh], p=p)

    # Account for mixture distribution structure
    res[wh] <- p + (1 - p) * res[wh]
  }

  res
}

#' Get probabilities of threshold exceedance for a GPD model.
#'
#' Get probabilities of threshold exceedance for a genralized Pareto model.
#' @param X An index parameter used in a call to \code{sapply} or \code{lapply}.
#' @param par A numeric matrix with 2 columns containing the parameters for the GPD model.
#' @param u The threshold.
#' @param p The rate of threshold exceedance.
#' @param r The original data to which the model was fit.
#' @param m The threshold above which we wish to estimated the exceedance rate.
#' @param family The family object used in extreme value modelling.
#' @param th The threshold used to fit the model.
margarita.getProbs <- function(X, par, u, p, r, m, family, th) {
    u <- u[[X]]
    par <- par[[X]]
    lapply(1:ncol(m), margarita.rp, u=u, par=par, p=p, r=r, xm=m, family=family, th=th)
}

#' Construct a matrix of thresholds whose rate of exceedance we wish to estimate.
#'
#' @param M Numeric vector of thresholds.
#' @param scale Whether we are interested in the raw values of M or are treating
#'        them as fold-changes from baseline ('proportional') or absolute
#'        differences ('difference')
#' @param trans A function for transforming the data, the same as the function
#'        used to transform the data before fitting the original linear model.
#' @param d A list of data.frames
#' @param baseline The name of the baseline column in d
margarita.rp.matrix <- function(M, scale, trans, d, baseline){
    # Get baseline data - should be identical for each element of d, so use d[[1]]
    baseline <- d[[1]][, baseline]
    nr <- nrow(d[[1]])

    if (scale == "r"){ # raw
        m <- matrix(rep(trans(M), nr), ncol=length(M), byrow=TRUE)
    } else if (scale == "p") { # M is a multiple of baseline
        m <- matrix(rep(0, nr * length(M)), ncol=length(M))
        # d[[i]][, baseline] is the same for all i
        for (i in seq_along(M)) {
            m[, i] <- trans(M[i] * baseline)
        }
    } else if (scale == "d"){ # difference
        m <- matrix(rep(0, nr * length(M)), ncol=length(M))
        for (i in seq_along(M)){
            m[, i] <- trans(baseline) + M[i]
        }
    }
    m
}

#' Check an allowed value of the scale argument has been given and reduce it to
#' its first letter
#'
#' @param s A character string which should be 'raw', 'proportional' or 'difference'
#'         or an abbreviation of one of those. Only the first letter is returned.
margaritaScale <- function(s){
    if (length(s) > 1 | !is.character(s)){
        stop("scale should be a character vector of length 1")
    }
    s <- substring(casefold(s), 1, 1)
    if (!is.element(s, c("r", "p", "d"))){
        stop("scale can be raw, proportional or difference (only the first letter is checked)")
    }
    else{
        s
    }
}

#' Stack a list of data.frames or matrices
#'
#' Stack a list of data.frames or matrices that have the same number of columns
#' @importFrom utils stack
#' @method stack list
#' @export
#' @param x A list containing data.frames or matrices with the same number of columns
#' @param ... Additional arguments, currently unused.
#' @details A rudimentary check is performed to see if the objects in \code{x} have the
#'          same number of columns. If so, \code{rbind} is used to stack them.
stack.list <- function(x, ...){
  nc <- try(sapply(x, ncol), silent=TRUE)
  if (is.numeric(nc) & length(unique(nc)) == 1)
    do.call("rbind", x)
  else
    stop("x should be a list of data.frames or matrices with the same number of columns")
}

#' @export
"/.summary.margarita.sim.rl" <- function(x, value){
  x <- lapply(x, function(a) a/value)
  oldClass(x) <- "summary.margarita.sim.rl"
  x
}

#' @export
"*.summary.margarita.sim.rl" <- function(x, value){
  x <- lapply(x, function(a) a*value)
  oldClass(x) <- "summary.margarita.sim.rl"
  x
}

#' @export
"-.summary.margarita.sim.rl" <- function(x, value){
  x <- lapply(x, function(a) a-value)
  oldClass(x) <- "summary.margarita.sim.rl"
  x
}

#' @export
"+.summary.margarita.sim.rl" <- function(x, value){
  x <- lapply(x, function(a) a+value)
  oldClass(x) <- "summary.margarita.sim.rl"
  x
}
harrysouthworth/margarita documentation built on Aug. 19, 2021, 5 a.m.