##' Point-wise and simultaneous confidence intervals for derivatives of smooths
##'
##' Calculates point-wise confidence or simultaneous intervals for the first derivatives of smooth terms in a fitted GAM.
##'
##' @param object an object of class `"fderiv"` containing the estimated derivatives.
##' @param parm which parameters (smooth terms) are to be given intervals as a vector of terms. If missing, all parameters are considered.
##' @param level numeric, `0 < level < 1`; the confidence level of the point-wise or simultaneous interval. The default is `0.95` for a 95\% interval.
##' @param type character; the type of interval to compute. One of `"confidence"` for point-wise intervals, or `"simultaneous"` for simultaneous intervals.
##' @param nsim integer; the number of simulations used in computing the simultaneous intervals.
##' @param ... additional arguments for methods
##'
##' @return a data frame with components:
##' 1. `term`; factor indicating to which term each row relates,
##' 2. `lower`; lower limit of the confidence or simultaneous interval,
##' 3. `est`; estimated derivative
##' 4. `upper`; upper limit of the confidence or simultaneous interval.
##'
##' @author Gavin L. Simpson
##'
##' @export
##'
##' @examples
##' library("mgcv")
##' set.seed(2)
##' dat <- gamSim(1, n = 400, dist = "normal", scale = 2)
##' mod <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = dat, method = "REML")
##'
##' ## first derivatives of all smooths...
##' fd <- fderiv(mod)
##'
##' ## point-wise interval
##' ci <- confint(fd, type = "confidence")
##' head(ci)
##'
##' ## simultaneous interval for smooth term of x1
##' set.seed(42)
##' x1.sint <- confint(fd, parm = "x1", type = "simultaneous", nsim = 1000)
##' head(x1.sint)
`confint.fderiv` <- function(object, parm, level = 0.95,
type = c("confidence", "simultaneous"), nsim = 10000, ...) {
## Process arguments
## parm is one of the terms in object
parm <- if(missing(parm)) {
object$terms
} else {
parm <- add_s(parm)
terms <- object$terms
want <- parm %in% terms
if (any(!want)) {
msg <- paste("Terms:", paste(parm[!want], collapse = ", "),
"not found in `object`")
stop(msg)
}
parm[want]
}
## parm <- add_s(parm)
## level should be length 1, numeric and 0 < level < 1
if ((ll <- length(level)) > 1L) {
warning(paste("`level` should be length 1, but supplied length: ",
ll, ". Using the first only."))
level <- rep(level, length.out = 1L)
}
if (!is.numeric(level)) {
stop(paste("`level` should be numeric, but supplied:", level))
}
if (! (0 < level) && (level < 1)) {
stop(paste("`level` should lie in interval [0,1], but supplied:", level))
}
## which type of interval is required
type <- match.arg(type)
## generate intervals
interval <- if (type == "confidence") {
confidence(object, terms = parm, level = level)
} else {
simultaneous(object, terms = parm, level = level, nsim = nsim)
}
class(interval) <- c("confint.fderiv", "data.frame")
## return
interval
}
##' @importFrom stats quantile vcov
##' @importFrom MASS mvrnorm
`simultaneous` <- function(x, terms, level, nsim) {
## wrapper the computes each interval
`simInt` <- function(x, Vb, bu, level, nsim) {
Xi <- x[["Xi"]] # derivative Lp, zeroed except for this term
se <- x[["se.deriv"]] # std err of deriv for current term
d <- x[["deriv"]] # deriv for current term
simDev <- Xi %*% t(bu) # simulate deviations from expected
absDev <- abs(sweep(simDev, 1, se, FUN = "/")) # absolute deviations
masd <- apply(absDev, 2L, max) # & maxabs deviation per sim
## simultaneous interval critical value
crit <- quantile(masd, prob = level, type = 8)
## return as data frame
data.frame(lower = d - (crit * se), est = d, upper = d + (crit * se))
}
## bayesian covar matrix, possibly accounting for estimating smooth pars
Vb <- vcov(x[["model"]], unconditional = x$unconditional)
## simulate un-biased deviations given bayesian covar matrix
buDiff <- MASS::mvrnorm(n = nsim, mu = rep(0, nrow(Vb)), Sigma = Vb)
## apply wrapper to compute simultaneous interval critical value and
## corresponding simultaneous interval for each term
res <- lapply(x[["derivatives"]][terms], FUN = simInt,
Vb = Vb, bu = buDiff, level = level, nsim = nsim)
## how many values per term - currently all equal
lens <- vapply(res, FUN = NROW, FUN.VALUE = integer(1))
res <- do.call("rbind", res) # row-bind each component of res
res <- cbind(term = rep(terms, times = lens), res) # add on term ID
rownames(res) <- NULL # tidy up
res # return
}
##' @importFrom stats qnorm
`confidence` <- function(x, terms, level) {
## wrapper the computes each interval
`confInt` <- function(x, level) {
se <- x[["se.deriv"]] # std err of deriv for current term
d <- x[["deriv"]] # deriv for current term
## confidence interval critical value
crit <- qnorm(1 - ((1 - level) / 2))
## return as data frame
data.frame(lower = d - (crit * se), est = d, upper = d + (crit * se))
}
## apply wrapper to compute confidence interval critical value and
## corresponding confidence interval for each term
res <- lapply(x[["derivatives"]][terms], FUN = confInt, level = level)
## how many values per term - currently all equal
lens <- vapply(res, FUN = NROW, FUN.VALUE = integer(1))
res <- do.call("rbind", res) # row-bind each component of res
res <- cbind(term = rep(terms, times = lens), res) # add on term ID
rownames(res) <- NULL # tidy up
res # return
}
##' Point-wise and simultaneous confidence intervals for smooths
##'
##' Calculates point-wise confidence or simultaneous intervals for the smooth terms of a fitted GAM.
##'
##' @param object an object of class `"gam"` or `"gamm"`.
##' @param parm which parameters (smooth terms) are to be given intervals as a vector of terms. If missing, all parameters are considered, although this is not currently implemented.
##' @param level numeric, `0 < level < 1`; the confidence level of the point-wise or simultaneous interval. The default is `0.95` for a 95\% interval.
##' @param newdata data frame; containing new values of the covariates used in the model fit. The selected smooth(s) wil be evaluated at the supplied values.
##' @param n numeric; the number of points to evaluate smooths at.
##' @param type character; the type of interval to compute. One of `"confidence"` for point-wise intervals, or `"simultaneous"` for simultaneous intervals.
##' @param nsim integer; the number of simulations used in computing the simultaneous intervals.
##' @param shift logical; should the constant term be add to the smooth?
##' @param transform logical; should the smooth be evaluated on a transformed scale? For generalised models, this involves applying the inverse of the link function used to fit the model. Alternatively, the name of, or an actual, function can be supplied to transform the smooth and it's confidence interval.
##' @param unconditional logical; if `TRUE` (and `freq == FALSE`) then the Bayesian smoothing parameter uncertainty corrected covariance matrix is returned, if available.
##' @param ... additional arguments for methods
##'
##' @return a data frame with components:
##' 1. `term`; factor indicating to which term each row relates,
##' 2. `x`; the vector of values at which the smooth was evaluated,
##' 3. `lower`; lower limit of the confidence or simultaneous interval,
##' 4. `est`; estimated value of the smooth
##' 5. `upper`; upper limit of the confidence or simultaneous interval,
##' 6. `crit`; critical value for the `100 * level`% confidence interval.
##'
##' @author Gavin L. Simpson
##'
##' @importFrom stats family qnorm
##' @importFrom mgcv PredictMat
##' @importFrom stats quantile vcov setNames
##' @importFrom MASS mvrnorm
##'
##' @export
##'
##' @examples
##' library("mgcv")
##' set.seed(2)
##' dat <- gamSim(1, n = 400, dist = "normal", scale = 2)
##' mod <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = dat, method = "REML")
##' ##'
##' ## point-wise interval
##' ci <- confint(mod, parm = "x1", type = "confidence")
##' head(ci)
##' ##'
##' ## simultaneous interval for smooth term of x1
##' set.seed(42)
##' si <- confint(mod, parm = "x1", type = "simultaneous", nsim = 100)
##' head(si)
`confint.gam` <- function(object, parm, level = 0.95, newdata = NULL, n = 200,
type = c("confidence", "simultaneous"), nsim = 10000,
shift = FALSE, transform = FALSE, unconditional = FALSE,
...) {
parm <- add_s(parm)
## parm <- select_smooth(object, parm) # select_terms(object, parm)
S <- smooths(object) # vector of smooth labels - "s(x)"
## which --- ie index --- smooths match parm
take <- which_smooth(object, parm)
S <- S[take]
## can only do confints for 1d smooths currently --- get smooth dimensions & prune list `S`
d <- smooth_dim(object)[take]
S <- S[d <= 1L]
## look to see if smooth is a by variable
by_levs <- NULL
is_by <- vapply(object[["smooth"]][take], is_by_smooth, logical(1L))
if (any(is_by)) {
S <- vapply(strsplit(S, ":"), `[[`, character(1L), 1L)
by_levs <- vapply(object[["smooth"]][take], by_level, character(1L))
by_var <- vapply(object[["smooth"]][take], by_variable, character(1L))
}
## unique smooths (counts all levels of a by factor as a single smooth)
uS <- unique(S)
## how many data points if newdata supplied
if (!is.null(newdata)) {
n <- NROW(newdata)
}
ilink <- if (is.logical(transform)) { # transform is logical
if (isTRUE(transform)) { # transform == TRUE
family(object)$linkinv
} else { # transform == FALSE
function(eta) { eta }
}
} else if (!is.null(transform)) { # transform is a fun
match.fun(transform)
}
## which type of confidence interval
type <- match.arg(type)
if (isTRUE(type == "simultaneous")) {
## need VCOV for simultaneous intervals
V <- get_vcov(object, unconditional = unconditional)
## simulate un-biased deviations given bayesian covar matrix
buDiff <- MASS::mvrnorm(n = nsim, mu = rep(0, nrow(V)), Sigma = V)
}
## list to hold results
out <- vector("list", length = length(uS)) # list for results
if (isTRUE(type == "confidence")) {
for (i in seq_along(out)) {
out[[i]] <- evaluate_smooth(object, uS[i], n = n, newdata = newdata)
out[[i]][["crit"]] <- qnorm((1 - level) / 2, lower.tail = FALSE)
}
} else {
## function to do simultaneous intervals for a smooth
## this should be outlined as an actual function...
## @param smooth list; the individual smooth to work on
## @param level numeric; the confidence level
## @param newdata dataframe; values to compute confidence interval at
sim_interval <- function(smooth, level, newdata) {
start <- smooth[["first.para"]]
end <- smooth[["last.para"]]
para.seq <- start:end
Cg <- PredictMat(smooth, newdata)
simDev <- Cg %*% t(buDiff[, para.seq])
absDev <- abs(sweep(simDev, 1L, newdata[["se"]], FUN = "/"))
masd <- apply(absDev, 2L, max)
quantile(masd, probs = level, type = 8)
}
## need VCOV for simultaneous intervals
V <- get_vcov(object, unconditional = unconditional)
## simulate un-biased deviations given bayesian covar matrix
buDiff <- MASS::mvrnorm(n = nsim, mu = rep(0, nrow(V)), Sigma = V)
## loop over smooths
for (i in seq_along(out)) {
## evaluate smooth
out[[i]] <- evaluate_smooth(object, uS[i], n = n, newdata = newdata)
## if this is a by var smooth, we need to do this for each level of by var
if (is.null(by_levs)) { # not by variable smooth
smooth <- get_smooth(object, parm) # get the specific smooth
crit <- sim_interval(smooth, level = level, newdata = out[[i]])
out[[i]][["crit"]] <- crit # add on the critical value for this smooth
} else { # is a by variable smooth
out[[i]][["crit"]] <- 0 # fill in a variable crit
smooth <- get_smooth(object, parm)
for (l in seq_along(by_levs)) {
ind <- out[[i]][[5L]] == by_levs[l] # which rows in evaulated smooth contain this levels data?
crit <- sim_interval(smooth[[l]], level = level, newdata = out[[i]][ind, ])
out[[i]][["crit"]][ind] <- crit # add on the critical value for this smooth
}
}
}
}
const <- coef(object)
nms <- names(const)
test <- grep("Intercept", nms)
const <- ifelse(length(test) == 0L, 0, const[test])
## simplify to a data frame for return
out <- do.call("rbind", out)
## using se and crit, compute the lower and upper intervals
out <- cbind(out,
lower = out[["est"]] - (out[["crit"]] * out[["se"]]),
upper = out[["est"]] + (out[["crit"]] * out[["se"]]))
## transform
out[, "est"] <- ilink(out[, "est"] + const)
out[, "lower"] <- ilink(out[, "lower"] + const)
out[, "upper"] <- ilink(out[, "upper"] + const)
## prepare for return
class(out) <- c("confint.gam", "data.frame")
out # return
}
##' @rdname confint.gam
##'
##' @importFrom stats confint
##'
##' @export
`confint.gamm` <- function(object, ...) {
confint(object[["gam"]], ...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.