Nothing
#' Point-wise and simultaneous confidence intervals for derivatives of smooths
#'
#' Calculates point-wise confidence or simultaneous intervals for the first
#' derivatives of smooth terms in a fitted GAM.
#'
#' @param object an object of class `"fderiv"` containing the estimated
#' derivatives.
#' @param parm which parameters (smooth terms) are to be given intervals as a
#' vector of terms. If missing, all parameters are considered.
#' @param level numeric, `0 < level < 1`; the confidence level of the
#' point-wise or simultaneous interval. The default is `0.95` for a 95%
#' interval.
#' @param type character; the type of interval to compute. One of `"confidence"`
#' for point-wise intervals, or `"simultaneous"` for simultaneous intervals.
#' @param nsim integer; the number of simulations used in computing the
#' simultaneous intervals.
#' @param ncores number of cores for generating random variables from a
#' multivariate normal distribution. Passed to [mvnfast::rmvn()].
#' Parallelization will take place only if OpenMP is supported (but appears
#' to work on Windows with current `R`).
#' @param ... additional arguments for methods
#'
#' @return a data frame with components:
#' 1. `term`; factor indicating to which term each row relates,
#' 2. `lower`; lower limit of the confidence or simultaneous interval,
#' 3. `est`; estimated derivative
#' 4. `upper`; upper limit of the confidence or simultaneous interval.
#'
#' @author Gavin L. Simpson
#'
#' @export
#'
#' @examples
#' load_mgcv()
#' \dontshow{
#' op <- options(pillar.sigfig = 2, cli.unicode = FALSE)
#' }
#' dat <- data_sim("eg1", n = 1000, dist = "normal", scale = 2, seed = 2)
#' mod <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = dat, method = "REML")
#'
#' # new data to evaluate the derivatives at, say over the middle 50% of range
#' # of each covariate
#' middle <- function(x, n = 25, coverage = 0.5) {
#' v <- (1 - coverage) / 2
#' q <- quantile(x, prob = c(0 + v, 1 - v), type = 8)
#' seq(q[1], q[2], length = n)
#' }
#' new_data <- sapply(dat[c("x0", "x1", "x2", "x3")], middle)
#' new_data <- data.frame(new_data)
#' ## first derivatives of all smooths...
#' fd <- fderiv(mod, newdata = new_data)
#'
#' ## point-wise interval
#' ci <- confint(fd, type = "confidence")
#' ci
#'
#' ## simultaneous interval for smooth term of x2
#' \dontshow{
#' set.seed(42)
#' }
#' x2_sint <- confint(fd,
#' parm = "x2", type = "simultaneous",
#' nsim = 10000, ncores = 2
#' )
#' \donttest{
#' x2_sint
#' }
#' \dontshow{
#' options(op)
#' }
`confint.fderiv` <- function(object, parm, level = 0.95,
type = c("confidence", "simultaneous"),
nsim = 10000,
ncores = 1L, ...) {
## Process arguments
## parm is one of the terms in object
parm <- if (missing(parm)) {
object$terms
} else {
parm <- add_s(parm)
terms <- object$terms
want <- parm %in% terms
if (any(!want)) {
msg <- paste(
"Terms:", paste(parm[!want], collapse = ", "),
"not found in `object`"
)
stop(msg)
}
parm[want]
}
## parm <- add_s(parm)
## level should be length 1, numeric and 0 < level < 1
if ((ll <- length(level)) > 1L) {
warning(paste(
"`level` should be length 1, but supplied length: ",
ll, ". Using the first only."
))
level <- rep(level, length.out = 1L)
}
if (!is.numeric(level)) {
stop(paste("`level` should be numeric, but supplied:", level))
}
if (!(0 < level) && (level < 1)) {
stop(paste(
"`level` should lie in interval [0,1], but supplied:",
level
))
}
## which type of interval is required
type <- match.arg(type)
## generate intervals
interval <- if (type == "confidence") {
confidence(object, terms = parm, level = level)
} else {
simultaneous(object,
terms = parm, level = level, nsim = nsim,
ncores = ncores
)
}
interval <- as_tibble(interval)
class(interval) <- c("confint.fderiv", class(interval))
## return
interval
}
#' @importFrom stats quantile vcov
#' @importFrom mvnfast rmvn
`simultaneous` <- function(x, terms, level, nsim, ncores) {
## wrapper the computes each interval
`simInt` <- function(x, Vb, bu, level, nsim) {
Xi <- x[["Xi"]] # derivative Lp, zeroed except for this term
se <- x[["se.deriv"]] # std err of deriv for current term
d <- x[["deriv"]] # deriv for current term
simDev <- Xi %*% t(bu) # simulate deviations from expected
absDev <- abs(sweep(simDev, 1, se, FUN = "/")) # absolute deviations
masd <- apply(absDev, 2L, max) # & maxabs deviation per sim
## simultaneous interval critical value
crit <- quantile(masd, prob = level, type = 8)
## return as data frame
data.frame(lower = d - (crit * se), est = d, upper = d + (crit * se))
}
## bayesian covar matrix, possibly accounting for estimating smooth pars
Vb <- vcov(x[["model"]], unconditional = x$unconditional)
## simulate un-biased deviations given bayesian covar matrix
buDiff <- mvnfast::rmvn(
n = nsim, mu = rep(0, nrow(Vb)), sigma = Vb,
ncores = ncores
)
## apply wrapper to compute simultaneous interval critical value and
## corresponding simultaneous interval for each term
res <- lapply(x[["derivatives"]][terms],
FUN = simInt,
Vb = Vb, bu = buDiff, level = level, nsim = nsim
)
## how many values per term - currently all equal
lens <- vapply(res, FUN = NROW, FUN.VALUE = integer(1))
res <- do.call("rbind", res) # row-bind each component of res
res <- cbind(term = rep(terms, times = lens), res) # add on term ID
rownames(res) <- NULL # tidy up
res # return
}
#' @importFrom stats qnorm
`confidence` <- function(x, terms, level) {
## wrapper the computes each interval
`confInt` <- function(x, level) {
se <- x[["se.deriv"]] # std err of deriv for current term
d <- x[["deriv"]] # deriv for current term
## confidence interval critical value
crit <- qnorm(1 - ((1 - level) / 2))
## return as data frame
data.frame(lower = d - (crit * se), est = d, upper = d + (crit * se))
}
## apply wrapper to compute confidence interval critical value and
## corresponding confidence interval for each term
res <- lapply(x[["derivatives"]][terms], FUN = confInt, level = level)
## how many values per term - currently all equal
lens <- vapply(res, FUN = NROW, FUN.VALUE = integer(1))
res <- do.call("rbind", res) # row-bind each component of res
res <- cbind(term = rep(terms, times = lens), res) # add on term ID
rownames(res) <- NULL # tidy up
res # return
}
#' Point-wise and simultaneous confidence intervals for smooths
#'
#' Calculates point-wise confidence or simultaneous intervals for the smooth
#' terms of a fitted GAM.
#'
#' @param object an object of class `"gam"` or `"gamm"`.
#' @param parm which parameters (smooth terms) are to be given intervals as a
#' vector of terms. If missing, all parameters are considered, although this
#' is not currently implemented.
#' @param level numeric, `0 < level < 1`; the confidence level of the point-wise
#' or simultaneous interval. The default is `0.95` for a 95% interval.
#' @param data data frame; new values of the covariates used in the model fit.
#' The selected smooth(s) wil be evaluated at the supplied values.
#' @param n numeric; the number of points to evaluate smooths at.
#' @param type character; the type of interval to compute. One of `"confidence"`
#' for point-wise intervals, or `"simultaneous"` for simultaneous intervals.
#' @param nsim integer; the number of simulations used in computing the
#' simultaneous intervals.
#' @param shift logical; should the constant term be add to the smooth?
#' @param transform logical; should the smooth be evaluated on a transformed
#' scale? For generalised models, this involves applying the inverse of the
#' link function used to fit the model. Alternatively, the name of, or an
#' actual, function can be supplied to transform the smooth and it's
#' confidence interval.
#' @param unconditional logical; if `TRUE` (and `freq == FALSE`) then the
#' Bayesian smoothing parameter uncertainty corrected covariance matrix is
#' returned, if available.
#' @param ncores number of cores for generating random variables from a
#' multivariate normal distribution. Passed to [mvnfast::rmvn()].
#' Parallelization will take place only if OpenMP is supported (but appears
#' to work on Windows with current `R`).
#' @param partial_match logical; should matching `parm` use a partial match or
#' an exact match? Can only be used if `length(parm)` is `1`.
#' @param ... additional arguments for methods
#' @param newdata DEPRECATED! data frame; containing new values of the
#' covariates used in the model fit. The selected smooth(s) wil be evaluated
#' at the supplied values.
#'
#' @return a tibble with components:
#' 1. `.smooth`; character indicating to which term each row relates,
#' 2. `.type`; the type of smooth,
#' 3. `.by` the name of the by variable if a by smooth, `NA` otherwise,
#' 4. one or more vectors of values at which the smooth was evaluated, named as
#' per the variables in the smooth,
#' 5. zero or more variables containing values of the by variable,
#' 6. `.estimate`; estimated value of the smooth,
#' 7. `.se`; standard error of the estimated value of the smooth,
#' 8. `.crit`; critical value for the `100 * level`% confidence interval.
#' 9. `.lower_ci`; lower limit of the confidence or simultaneous interval,
#' 10. `.upper_ci`; upper limit of the confidence or simultaneous interval,
#'
#' @author Gavin L. Simpson
#'
#' @importFrom stats family qnorm
#' @importFrom mgcv PredictMat
#' @importFrom stats quantile vcov setNames
#' @importFrom dplyr bind_rows
#' @importFrom tibble add_column
#' @importFrom mvnfast rmvn
#' @importFrom tidyselect all_of last_col
#'
#' @export
#'
#' @examples
#' load_mgcv()
#' \dontshow{
#' op <- options(pillar.sigfig = 2, cli.unicode = FALSE)
#' }
#' dat <- data_sim("eg1", n = 1000, dist = "normal", scale = 2, seed = 2)
#' mod <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = dat, method = "REML")
#'
#' # new data to evaluate the smooths at, say over the middle 50% of range
#' # of each covariate
#' middle <- function(x, n = 50, coverage = 0.5) {
#' v <- (1 - coverage) / 2
#' q <- quantile(x, prob = c(0 + v, 1 - v), type = 8)
#' seq(q[1], q[2], length = n)
#' }
#' new_data <- sapply(dat[c("x0", "x1", "x2", "x3")], middle)
#' new_data <- data.frame(new_data)
#'
#' ## point-wise interval for smooth of x2
#' ci <- confint(mod, parm = "s(x2)", type = "confidence", data = new_data)
#' ci
#' \dontshow{
#' options(op)
#' }
`confint.gam` <- function(object, parm, level = 0.95, data = newdata, n = 100,
type = c("confidence", "simultaneous"), nsim = 10000,
shift = FALSE, transform = FALSE,
unconditional = FALSE,
ncores = 1, partial_match = FALSE,
..., newdata = NULL) {
S <- smooths(object)
## select smooths
select <- check_user_select_smooths(
smooths = S,
select = parm,
partial_match = partial_match
)
## did it match anything?
if (!any(select)) {
stop("No smooths matched <", paste(parm, collapse = ", "),
">. Try adding `partial_match = TRUE`?",
call. = FALSE
)
}
take <- select & (smooth_dim(object) <= 2L)
S <- S[take]
## look to see if smooth is a by variable
by_levs <- NULL
is_by <- vapply(object[["smooth"]][take], is_by_smooth, logical(1L))
if (any(is_by)) {
# S <- vapply(strsplit(S, ":"), `[[`, character(1L), 1L)
by_levs <- vapply(object[["smooth"]][take], by_level, character(1L))
by_var <- vapply(object[["smooth"]][take], by_variable, character(1L))
}
## unique smooths (counts all levels of a by factor as a single smooth)
uS <- unique(S)
# warn if user uses newdata
if (!is.null(newdata)) {
newdata_deprecated()
}
## how many data points if data supplied
if (!is.null(data)) {
n <- NROW(data)
}
ilink <- if (is.logical(transform)) { # transform is logical
if (isTRUE(transform)) { # transform == TRUE
family(object)$linkinv
} else { # transform == FALSE
function(eta) {
eta
}
}
} else if (!is.null(transform)) { # transform is a fun
match.fun(transform)
}
## which type of confidence interval
type <- match.arg(type)
## list to hold results
out <- vector("list", length = length(uS)) # list for results
if (isTRUE(type == "confidence")) {
for (i in seq_along(out)) {
out[[i]] <- smooth_estimates(object,
select = uS[i],
n = n, data = data, partial_match = partial_match
)
out[[i]][[".crit"]] <- coverage_normal(level)
}
} else {
## function to do simultaneous intervals for a smooth
## this should be outlined as an actual function...
## @param smooth list; the individual smooth to work on
## @param level numeric; the confidence level
## @param data dataframe; values to compute confidence interval at
sim_interval <- function(smooth, level, data) {
start <- smooth[["first.para"]]
end <- smooth[["last.para"]]
para.seq <- start:end
Cg <- PredictMat(smooth, data)
simDev <- Cg %*% t(buDiff[, para.seq])
absDev <- abs(sweep(simDev, 1L, data[[".se"]], FUN = "/"))
masd <- apply(absDev, 2L, max)
unname(quantile(masd, probs = level, type = 8))
}
## need VCOV for simultaneous intervals
V <- get_vcov(object, unconditional = unconditional)
## simulate un-biased deviations given bayesian covar matrix
buDiff <- mvnfast::rmvn(
n = nsim, mu = rep(0, nrow(V)), sigma = V,
ncores = ncores
)
## loop over smooths
for (i in seq_along(out)) {
## evaluate smooth
out[[i]] <- smooth_estimates(object,
select = uS[i],
n = n, data = data, partial_match = partial_match
)
# if this is a by var smooth, we need to do this for each level of
# by var
# I think this can now be simplified to just be the code in the
# else branch as smooth_estimates knows how to handle very specific
# smooth names FIXME
if (is.null(by_levs)) { # not by variable smooth
smooth <- get_smooth(object, parm) # get the specific smooth
crit <- sim_interval(smooth, level = level, data = out[[i]])
} else { # is a by variable smooth
smooth <- old_get_smooth(object, uS[i])
crit <- sim_interval(smooth, level = level, data = out[[i]])
}
out[[i]][[".crit"]] <- crit # add on the critical value
}
}
if (shift) {
const <- coef(object)
nms <- names(const)
test <- grep("Intercept", nms)
const <- ifelse(length(test) == 0L, 0, const[test])
} else {
const <- 0
}
## simplify to a data frame for return
out <- bind_rows(out)
# This was needed with `[.evaluated_smooth` before switching to
# NextMethod() to call the next S3 `[` method.
# See: https://github.com/tidyverse/tibble/issues/511#issuecomment-431225229
# Note needed, it seems now that NextMethod() is used but extending tibbles
# is not currently well documented.
# class(out) <- class(out)[-(1:2)]
## using se and crit, compute the lower and upper intervals
# out <- add_column(out,
# .lower_ci = out$.estimate - (out$.crit * out$.se),
# .upper_ci = out$.estimate + (out$.crit * out$.se)
# )
out <- mutate(out,
.estimate = .data$.estimate + const
) |>
mutate(
.lower_ci = .data$.estimate - (.data$.crit * .data$.se),
.upper_ci = .data$.estimate + (.data$.crit * .data$.se)
)
## transform
out <- out |>
mutate(across(all_of(c(".estimate", ".lower_ci", ".upper_ci")),
.fns = ilink
))
# smooth_estimates has columns in different places, relocate them to match
# old output as much as possible
out <- relocate(out, all_of(c(
".estimate", ".se", ".crit", ".lower_ci", ".upper_ci"
)), .after = last_col())
## prepare for return
class(out) <- c("confint.gam", class(out))
out # return
}
#' @rdname confint.gam
#'
#' @importFrom stats confint
#'
#' @export
`confint.gamm` <- function(object, ...) {
confint(object[["gam"]], ...)
}
#' @rdname confint.gam
#'
#' @importFrom stats confint
#'
#' @export
`confint.list` <- function(object, ...) {
if (!is_gamm4(object)) {
stop("`object` does not appear to a `gamm4` model object",
call. = FALSE
)
}
confint(object[["gam"]], ...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.