Nothing
#' Threshold stability plots
#'
#' The generalized Pareto and exponential distribution
#' are threshold stable. This property, which is used
#' for extrapolation purposes, can also be used to diagnose
#' goodness-of-fit: we expect the parameters \eqn{\xi} and \eqn{\tilde{\sigma} = \sigma + \xi u}
#' to be constant over a range of thresholds. The threshold stability
#' plot consists in plotting maximum likelihood estimates with pointwise confidence interval.
#' This function handles interval truncation and right-censoring.
#'
#' @details The shape estimates are constrained
#' @inheritParams nll_elife
#' @param family string; distribution, either generalized Pareto (\code{gp}) or exponential (\code{exp})
#' @param method string; the type of pointwise confidence interval, either Wald (\code{wald}) or profile likelihood (\code{profile})
#' @param level probability level for the pointwise confidence intervals
#' @param plot.type string; either \code{base} for base R plots or \code{ggplot} for \code{ggplot2} plots
#' @param which.plot string; which parameters to plot;
#' @param plot logical; should a plot be returned alongside with the estimates? Default to \code{TRUE}
#' @seealso \code{tstab.gpd} from package \code{mev}, \code{gpd.fitrange} from package \code{ismev} or \code{tcplot} from package \code{evd}, among others.
#' @importFrom utils packageVersion
#' @export
#' @return an invisible list with pointwise estimates and confidence intervals for the scale and shape parameters
#' @examples
#' set.seed(1234)
#' n <- 100L
#' x <- samp_elife(n = n,
#' scale = 2,
#' shape = -0.2,
#' lower = low <- runif(n),
#' upper = upp <- runif(n, min = 3, max = 20),
#' type2 = "ltrt",
#' family = "gp")
#' tstab_plot <- tstab(time = x,
#' ltrunc = low,
#' rtrunc = upp,
#' thresh = quantile(x, seq(0, 0.5, length.out = 4)))
#' plot(tstab_plot, plot.type = "ggplot")
tstab <- function(
time,
time2 = NULL,
event = NULL,
thresh = 0,
ltrunc = NULL,
rtrunc = NULL,
type = c("right", "left", "interval", "interval2"),
family = c('gp', 'exp'),
method = c("wald", "profile"),
level = 0.95,
plot = TRUE,
plot.type = c("base", "ggplot"),
which.plot = c("scale", "shape"),
weights = NULL,
arguments = NULL,
...
) {
if (!is.null(arguments)) {
call <- match.call(expand.dots = FALSE)
arguments <- check_arguments(
func = tstab,
call = call,
arguments = arguments
)
return(do.call(tstab, args = arguments))
}
family <- match.arg(family)
method <- match.arg(method)
type <- match.arg(type)
if (plot) {
plot.type <- match.arg(plot.type)
which.plot <- match.arg(
which.plot,
choices = c("scale", "shape"),
several.ok = TRUE
)
}
wald_confint <- function(par, std.error, level = 0.95) {
c(par[1], par[1] + qnorm(0.5 + level / 2) * std.error[1] * c(-1, 1))
}
stopifnot("Provide multiple thresholds" = length(thresh) > 1)
scale_par_mat <- matrix(NA, nrow = length(thresh), ncol = 3)
if (family == "gp") {
shape_par_mat <- matrix(NA, nrow = length(thresh), ncol = 3)
}
for (i in seq_along(thresh)) {
opt_mle <- fit_elife(
time = time,
time2 = time2,
event = event,
thresh = thresh[i],
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
family = family,
export = TRUE,
weights = weights
)
opt_mle$mdat <- max(c(opt_mle$time, opt_mle$time2), na.rm = TRUE)
if (method == "profile") {
if (family == "gp") {
if ("shape" %in% which.plot) {
shape_i <- try(prof_gp_shape(
mle = opt_mle,
time = time,
time2 = time2,
event = event,
thresh = thresh[i],
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
level = level,
weights = weights
))
if (!inherits(shape_i, "try.error")) {
shape_par_mat[i, ] <- shape_i
}
}
if ("scale" %in% which.plot) {
scalet_i <- try(prof_gp_scalet(
mle = opt_mle,
time = time,
time2 = time2,
event = event,
thresh = thresh[i],
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
level = level,
weights = weights
))
if (!inherits(scalet_i, "try-error")) {
scale_par_mat[i, ] <- scalet_i
}
}
} else if (family == "exp") {
scale_i <- try(prof_exp_scale(
mle = opt_mle,
time = time,
time2 = time2,
event = event,
thresh = thresh[i],
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
level = level
))
if (!inherits(scale_i, "try-error")) {
scale_par_mat[i, ] <- scale_i
}
}
} else if (method == "wald") {
if (family == "gp") {
sigma_t_mle <- opt_mle$par[1] - opt_mle$par[2] * thresh[i]
dV <- matrix(c(1, -thresh[i]), ncol = 1)
if (!is.null(opt_mle$vcov)) {
v <- t(dV) %*% opt_mle$vcov %*% dV
shape_par_mat[i, ] <- wald_confint(
par = opt_mle$par[2],
opt_mle$std.error[2],
level = level
)
scale_par_mat[i, ] <- wald_confint(
par = sigma_t_mle,
std.error = sqrt(v),
level = level
)
}
} else if (family == "exp") {
if (!is.null(opt_mle$vcov)) {
scale_par_mat[i, ] <- wald_confint(
par = opt_mle$par,
opt_mle$std.error
)
}
}
}
}
colnames(scale_par_mat) <- c("estimate", "lower", "upper")
if (family == "gp") {
colnames(shape_par_mat) <- c("estimate", "lower", "upper")
if (!"scale" %in% which.plot) {
scale_par_mat <- NULL
}
if (!"shape" %in% which.plot) {
shape_par_mat <- NULL
}
res <- structure(
list(
family = family,
scale = scale_par_mat,
shape = shape_par_mat,
thresh = thresh
),
class = "elife_tstab"
)
} else if (family == "exp") {
res <- structure(
list(family = family, scale = scale_par_mat, thresh = thresh),
class = "elife_tstab"
)
}
if (plot) {
plot(res, plot.type = plot.type, which.plot = which.plot, plot = TRUE)
}
#TODO check this for left-truncated data
res$nexc <- vapply(
thresh,
function(u) {
sum(time > u, na.rm = TRUE)
},
integer(1)
)
invisible(res)
}
#' Threshold stability plots
#' @param x an object of class \code{elife_tstab} containing
#' the fitted parameters as a function of threshold
#' @inheritParams plot.elife_par
#' @export
plot.elife_tstab <- function(
x,
plot.type = c("base", "ggplot"),
which.plot = c("scale", "shape"),
plot = TRUE,
...
) {
object <- x
plot.type <- match.arg(plot.type)
if (plot.type == "ggplot") {
if (requireNamespace("ggplot2", quietly = TRUE)) {
} else {
warning("`ggplot2` package is not installed. Switching to base R plots.")
plot.type <- "base"
}
}
which.plot <- match.arg(
which.plot,
choices = c("scale", "shape"),
several.ok = TRUE
)
if (plot.type == "ggplot") {
ggplot_thstab <- function(x, thresh, ylab) {
stopifnot(all.equal(colnames(x), c("estimate", "lower", "upper")))
g <- ggplot2::ggplot(
data = as.data.frame(cbind(thresh = thresh, x)),
ggplot2::aes(x = .data[["thresh"]], y = .data[["estimate"]])
) +
ggplot2::geom_pointrange(
ggplot2::aes(ymin = .data[["lower"]], ymax = .data[["upper"]]),
size = 0.5,
shape = 20
) +
ggplot2::labs(
x = "threshold",
y = ylab,
main = "threshold stability plot"
) +
ggplot2::theme_classic() #+
# scale_x_continuous(breaks = thresh, minor_breaks = NULL)
return(invisible(g))
}
graphs <- list()
if (object$family == "gp") {
if ("scale" %in% which.plot) {
g1 <- ggplot_thstab(
x = object$scale,
thresh = object$thresh,
ylab = "modified scale"
)
if (length(which.plot) == 1L && plot) {
print(g1)
}
graphs$g1 <- g1
}
if ("shape" %in% which.plot) {
g2 <- ggplot_thstab(
x = object$shape,
thresh = object$thresh,
ylab = "shape"
)
if (length(which.plot) == 1L && plot) {
print(g2)
}
graphs$g2 <- g2
}
if (length(which.plot) == 2L && plot) {
lapply(list(g1, g2), print)
}
} else if (object$family == "exp") {
g1 <- ggplot_thstab(
x = object$scale,
thresh = object$thresh,
ylab = "scale"
)
if (plot) {
print(g1)
}
graphs$g1 <- g1
}
return(invisible(graphs))
} else {
# Default to base plot
base_tstab_plot <- function(x, thresh, ylab = "scale") {
rangex <- range(x)
args <- list(
type = "p",
pch = 19,
bty = "l",
ylab = ylab,
xlab = "threshold",
ylim = rangex
)
ellipsis <- list(...)
args[names(ellipsis)] <- ellipsis
args$x <- thresh
args$y <- x[, 1]
do.call("plot.default", args = args)
for (i in seq_along(thresh)) {
arrows(
x0 = thresh[i],
y0 = x[i, 2],
y1 = x[i, 3],
length = 0.02,
angle = 90,
code = 3,
lwd = 2
)
}
}
if (object$family == "exp") {
base_tstab_plot(x = object$scale, thresh = object$thresh)
} else {
#generalized Pareto
if ("scale" %in% which.plot) {
base_tstab_plot(
x = object$scale,
thresh = object$thresh,
ylab = "modified scale"
)
}
if ("shape" %in% which.plot) {
base_tstab_plot(
x = object$shape,
thresh = object$thresh,
ylab = "shape"
)
}
}
}
}
#' Profile log likelihood for the shape parameter of the generalized Pareto distribution
#'
#' This internal function is used to produce threshold stability plots.
#'
#' @inheritParams nll_elife
#' @param mle an object of class \code{elife_par}
#' @param confint logical; if \code{TRUE} (default), return confidence intervals rather than list
#' @param level level of the confidence interval
#' @keywords internal
#' @export
#' @return if \code{confint=TRUE}, a vector of length three with the maximum likelihood of the shape and profile-based confidence interval
prof_gp_shape <-
function(
mle = NULL,
time,
time2 = NULL,
event = NULL,
thresh,
ltrunc = NULL,
rtrunc = NULL,
type = c("right", "left", "interval", "interval2"),
level = 0.95,
psi = NULL,
weights = NULL,
confint = TRUE,
arguments = NULL,
...
) {
if (!is.null(arguments)) {
call <- match.call(expand.dots = FALSE)
arguments <- check_arguments(
func = prof_gp_shape,
call = call,
arguments = arguments
)
return(do.call(prof_gp_shape, args = arguments))
}
if (is.null(weights)) {
weights <- rep(1, length(time))
}
type <- match.arg(type)
stopifnot(
"Provide a single value for the level" = length(level) == 1L,
"Level should be a probability" = level < 1 && level > 0,
"Provide a single threshold" = length(thresh) == 1L
)
if (!is.null(mle)) {
stopifnot(
"`mle` should be an object of class `elife_par` as returned by `fit_elife`" = inherits(
mle,
"elife_par"
)
)
} else {
mle <- fit_elife(
time = time,
time2 = time2,
event = event,
thresh = thresh,
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
family = "gp",
export = TRUE,
weights = weights
)
mle$mdat <- max(c(mle$time, mle$time2), na.rm = TRUE)
}
if (is.null(psi)) {
psi <- seq(-0.99, 2, length = 100L)
} else {
stopifnot(
"`psi` should be a numeric vector" = is.numeric(psi),
"Grid of values for `psi` do not include the maximum likelihood estimate." = min(
psi
) <
mle$par[2] &
max(psi) > mle$par[2]
)
}
mdat <- mle$mdat
dev <- vapply(
psi,
function(xi) {
opt <- optimize(
f = function(lambda) {
nll_elife(
par = c(lambda, xi),
time = time,
time2 = time2,
event = event,
thresh = thresh,
type = type,
ltrunc = ltrunc,
rtrunc = rtrunc,
family = "gp",
weights = weights
)
},
interval = c(ifelse(xi < 0, mdat * abs(xi), 1e-8), 10 * mdat),
tol = 1e-10
)
c(-2 * opt$objective, opt$minimum)
},
FUN.VALUE = numeric(2)
)
prof <- list(
psi = psi,
lambda = dev[2, ],
pll = -2 * mle$loglik + dev[1, ],
maxpll = 0,
mle = mle$par,
psi.max = mle$par['shape'],
std.error = sqrt(mle$vcov[2, 2])
)
if (confint) {
conf_interv(prof, level = level, print = FALSE)
} else {
prof
}
}
#' Profile log likelihood for the transformed scale parameter of the generalized Pareto distribution
#'
#' This internal function is used to produce threshold stability plots.
#'
#' @inheritParams nll_elife
#' @param mle an object of class \code{elife_par}
#' @param confint logical; if \code{TRUE} (default), return confidence intervals rather than list
#' @param level level of the confidence interval
#' @keywords internal
#' @export
#' @return a confidence interval or a list with profile values
prof_gp_scalet <-
function(
mle = NULL,
time,
time2 = NULL,
event = NULL,
thresh = 0,
ltrunc = NULL,
rtrunc = NULL,
type = c("right", "left", "interval", "interval2"),
level = 0.95,
psi = NULL,
weights = NULL,
confint = TRUE,
arguments = NULL,
...
) {
if (!is.null(arguments)) {
call <- match.call(expand.dots = FALSE)
arguments <- check_arguments(
func = prof_gp_scalet,
call = call,
arguments = arguments
)
return(do.call(prof_gp_scalet, args = arguments))
}
if (is.null(weights)) {
weights <- rep(1, length(time))
}
type <- match.arg(type)
stopifnot(
"Provide a single value for the level" = length(level) == 1L,
"Level should be a probability" = level < 1 && level > 0,
"Provide a single threshold" = length(thresh) == 1L
)
if (!is.null(mle)) {
stopifnot(
"`mle` should be an object of class `elife_par` as returned by `fit_elife`" = inherits(
mle,
"elife_par"
)
)
} else {
mle <- fit_elife(
time = time,
time2 = time2,
event = event,
thresh = thresh,
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
family = "gp",
export = TRUE,
weights = weights
)
mle$mdat <- max(c(mle$time, mle$time2), na.rm = TRUE)
}
mdat <- mle$mdat
sigma_t_mle <- mle$par[1] - mle$par[2] * thresh
dV <- matrix(c(1, -thresh), ncol = 1)
sigma_t_se <- sqrt(as.numeric(t(dV) %*% mle$vcov %*% dV))
if (is.null(psi)) {
psi <- sigma_t_mle +
seq(-5 * sigma_t_se, 10 * sigma_t_se, length.out = 101)
} else {
stopifnot(
"`psi` should be a numeric vector" = is.numeric(psi),
"Grid of values for `psi` do not include the maximum likelihood estimate." = min(
psi
) <
sigma_t_mle &
max(psi) > sigma_t_mle
)
}
psi <- psi[psi > 0]
# Optimize is twice as fast as Rsolnp...
dev <- t(vapply(
psi,
function(scalet) {
opt <- optimize(
f = function(xi) {
nll_elife(
par = c(scalet + xi * thresh, xi),
time = time,
time2 = time2,
event = event,
thresh = thresh,
type = type,
ltrunc = ltrunc,
rtrunc = rtrunc,
family = "gp",
weights = weights
)
},
interval = c(
pmax(-1, ifelse(thresh == 0, -scalet / thresh, -scalet / mdat)),
3
),
tol = 1e-10
)
c(-2 * opt$objective, opt$minimum)
},
FUN.VALUE = numeric(2)
))
prof <- list(
psi = psi,
pll = -2 * mle$loglik + dev[, 1],
maxpll = 0,
mle = mle$par,
psi.max = sigma_t_mle,
std.error = sigma_t_se
)
if (confint) {
conf_interv(prof, level = level, print = FALSE)
} else {
prof
}
}
#' Profile log likelihood for the scale parameter of the exponential distribution
#'
#' This internal function is used to produce threshold stability plots.
#'
#' @inheritParams nll_elife
#' @param mle an object of class \code{elife_par}
#' @param confint logical; if \code{TRUE} (default), return confidence intervals rather than list
#' @param level level of the confidence interval
#' @export
#' @return a vector of length three with the maximum likelihood of the scale and profile-based confidence interval
#' @keywords internal
prof_exp_scale <- function(
mle = NULL,
time,
time2 = NULL,
event = NULL,
thresh = 0,
ltrunc = NULL,
rtrunc = NULL,
type = c("right", "left", "interval", "interval2"),
level = 0.95,
psi = NULL,
weights = NULL,
confint = TRUE,
arguments = NULL,
...
) {
if (!is.null(arguments)) {
call <- match.call(expand.dots = FALSE)
arguments <- check_arguments(
func = prof_exp_scale,
call = call,
arguments = arguments
)
return(do.call(prof_exp_scale, args = arguments))
}
type <- match.arg(type)
stopifnot(
"Provide a single value for the level" = length(level) == 1L,
"Level should be a probability" = level < 1 && level > 0,
"Provide a single threshold" = length(thresh) == 1L
)
if (is.null(weights)) {
weights <- rep(1, length(time))
}
if (!is.null(mle)) {
stopifnot(
"`mle` should be an object of class `elife_par` as returned by `fit_elife`" = inherits(
mle,
"elife_par"
)
)
} else {
mle <- fit_elife(
time = time,
time2 = time2,
event = event,
thresh = thresh,
ltrunc = ltrunc,
rtrunc = rtrunc,
type = type,
family = "exp",
weights = weights
)
}
if (is.null(psi)) {
psi <- mle$par +
seq(
pmax(-mle$par + 1e-4, -4 * mle$std.error),
4 * mle$std.error,
length.out = 201
)
} else {
stopifnot(
"`psi` should be a numeric vector" = is.numeric(psi),
"Grid of values for `psi` do not include the maximum likelihood estimate." = min(
psi
) <
mle$par &
max(psi) > mle$par
)
}
pll <- vapply(
psi,
function(scale) {
nll_elife(
par = scale,
time = time,
time2 = time2,
event = event,
thresh = thresh,
type = type,
ltrunc = ltrunc,
rtrunc = rtrunc,
family = "exp",
weights = weights
)
},
FUN.VALUE = numeric(1)
)
prof <- list(
psi = psi,
pll = -2 * (mle$loglik + pll),
maxpll = 0,
psi.max = mle$par,
std.error = mle$std.error,
mle = mle$par
)
if (confint) {
conf_interv(prof, level = level)
} else {
prof
}
}
#' Confidence intervals for profile likelihoods
#'
#' This code is adapted from the \code{mev} package.
#' @param object a list containing information about the profile likelihood in the same format as the \code{hoa} package
#' @param level probability level of the confidence interval
#' @param prob vector of length 2 containing the bounds, by default double-sided
#' @param print logical indicating whether the intervals are printed to the console
#' @param ... additional arguments passed to the function
#' @return a table with confidence intervals.
#' @keywords internal
conf_interv <- function(
object,
level = 0.95,
prob = c((1 - level) / 2, 1 - (1 - level) / 2),
print = FALSE,
...
) {
if (!isTRUE(all.equal(diff(prob), level, check.attributes = FALSE))) {
warning("Incompatible arguments: `level` does not match `prob`.")
}
args <- list(...)
if ("warn" %in% names(args) && is.logical(args$warn)) {
warn <- args$warn
} else {
warn <- TRUE
}
if (length(prob) != 2) {
stop("`prob` must be a vector of size 2")
prob <- sort(prob)
}
qulev <- qnorm(1 - prob)
conf <- rep(0, 3)
if (is.null(object$pll) && is.null(object$r)) {
stop(
"Object should contain arguments `pll` or `r` in order to compute confidence intervals."
)
}
if (is.null(object$r)) {
object$r <- sign(object$psi.max - object$psi) *
sqrt(2 * (object$maxpll - object$pll))
} else {
object$r[is.infinite(object$r)] <- NA
}
if (is.null(object$normal)) {
object$normal <- c(object$psi.max, object$std.error)
}
fit.r <- stats::smooth.spline(
x = na.omit(cbind(object$r, object$psi)),
cv = FALSE
)
pr <- predict(fit.r, c(0, qulev))$y
pr[1] <- object$normal[1]
conf <- pr
if (warn) {
if (!any(object$r > qnorm(prob[1]))) {
warning(
"Extrapolating the lower confidence interval for the profile likelihood ratio test"
)
}
if (!any(object$r < qnorm(prob[2]))) {
warning(
"Extrapolating the upper confidence interval for the profile likelihood ratio test"
)
}
}
names(conf) <- c("Estimate", "Lower CI", "Upper CI")
conf[2] <- ifelse(conf[2] > conf[1], NA, conf[2])
conf[3] <- ifelse(conf[3] < conf[1], NA, conf[3])
if (print) {
cat("Point estimate for the parameter of interest psi:\n")
cat("Maximum likelihood :", round(object$psi.max, 3), "\n")
cat("\n")
cat("Confidence intervals, levels :", prob, "\n")
cat(
"Wald intervals :",
round(
object$psi.max +
sort(qulev) * object$std.error,
3
),
"\n"
)
cat("Profile likelihood :", round(conf[2:3], 3), "\n")
}
return(invisible(conf))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.