#' Generate regular data over the covariates of a smooth
#'
#' @param model a fitted model
#' @param id the number ID of the smooth within `model` to process.
#' @param n numeric; the number of new observations to generate.
#' @param n_2d numeric; the number of new observations to generate for the
#' second dimension of a 2D smooth. *Currently ignored*.
#' @param n_3d numeric; the number of new observations to generate for the third
#' dimension of a 3D smooth.
#' @param n_4d numeric; the number of new observations to generate for the
#' dimensions higher than 2 (!) of a *k*D smooth (*k* >= 4). For example, if
#' the smooth is a 4D smooth, each of dimensions 3 and 4 will get `n_4d`
#' new observations.
#' @param offset numeric; value of the model offset to use.
#' @param include_all logical; include all covariates involved in the smooth?
#' if `FALSE`, only the covariates involved in the smooth will be included in
#' the returned data frame. If `TRUE`, a representative value will be included
#' for all other covariates in the model that aren't actually used in the
#' smooth. This can be useful if you want to pass the returned data frame on
#' to [mgcv::PredictMat()].
#' @param var_order character; the order in which the terms in the smooth
#' should be processed. Only useful for tensor products with at least one
#' 2d marginal smooth.
#'
#' @export
#'
#' @importFrom dplyr bind_cols setdiff
#' @importFrom tibble as_tibble
#' @importFrom rlang exec !!!
#' @importFrom tidyr expand_grid
#'
#' @examples
#' \dontshow{
#' op <- options(cli.unicode = FALSE, pillar.sigfig = 4)
#' }
#' load_mgcv()
#' df <- data_sim("eg1", seed = 42)
#' m <- bam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = df)
#'
#' # generate data over range of x1 for smooth s(x1)
#' smooth_data(m, id = 2)
#'
#' # generate data over range of x1 for smooth s(x1), with typical value for
#' # other covariates in the model
#' smooth_data(m, id = 2, include_all = TRUE)
#'
#' \dontshow{
#' options(op)
#' }
`smooth_data` <- function(model, id, n = 100, n_2d = NULL, n_3d = NULL,
n_4d = NULL, offset = NULL, include_all = FALSE,
var_order = NULL) {
mf <- model.frame(model) # model.frame used to fit model
## remove response
respvar <- attr(model$terms, "response")
if (!identical(respvar, 0)) {
mf <- mf[, -respvar, drop = FALSE]
}
# remove offset() var; model.frame returns both `offset(foo(var))` and
# `var`, so we can just remove the former, but we also want to set the
# offset variable `var` to something constant. FIXME
if (is.null(offset)) {
offset <- 1L
}
mf <- fix_offset(model, mf, offset_val = offset)
ff <- vapply(mf, is.factor, logical(1L)) # which, if any, are factors vars
## list of model terms (variable names); extract these from `var.summary`
## because model.frame() on a gamm() contains extraneous variables, related
## to the mixed model form for lme()
m.terms <- names(model[["var.summary"]])
## need a list of terms used in current smooth
sm <- get_smooths_by_id(model, id)[[1L]]
orig_order <- unique(smooth_variable(sm))
smooth_vars <- if (is.null(var_order)) {
orig_order
} else {
var_order
}
## is smooth a by? If it is, extract the by variable
by_var <- if (is_by_smooth(sm)) {
by_variable(sm)
} else {
NULL
}
used_vars <- c(smooth_vars, by_var)
# Figure out the n to use for each dimension
sm_dim <- smooth_dim(sm)
# fix up the n, n_3d, n_4d. If `n_3d` is `NULL` set `n_3d <- n`
if (is.null(n_3d)) {
n_3d <- n
}
# likewise fix up n_4d; set it to `n` if `n_4d` is NULL
if (is.null(n_4d)) {
n_4d <- n
}
seq_per_dim <- function(data, vars, dim, n, n_3d, n_4d) {
n_per_dim <- rep(n, dim)
if (dim == 3L) {
n_per_dim[3] <- n_3d
} else if (dim > 3L) {
n_per_dim[seq_len(dim - 2) + 2] <- n_4d
}
seq_min_max_wrapper <- function(i, data, vars, n) {
# if dim >= 2 we want n_3d or n_4d pretty values, but pretty()
# won't return exactly the right n
out <- seq_min_max(data[[vars[i]]], n = n[i])
# now that we have the ordering of vars corrected we can round here
if (i > 2L && !(is.factor(data[[vars[i]]]))) {
out <- round(out, 3)
}
out
out
}
out <- lapply(seq_along(vars),
FUN = seq_min_max_wrapper,
data = data, vars = vars, n = n_per_dim
)
out <- setNames(out, vars)
out
}
## generate covariate values for the smooth
# newlist <- lapply(mf[smooth_vars], seq_min_max, n = n)
newlist <- seq_per_dim(
data = mf, vars = smooth_vars, dim = sm_dim,
n = n, n_3d = n_3d, n_4d = n_4d
)
if (!is.null(by_var)) {
if (is_factor_by_smooth(sm)) {
## ordered or simple factor? Grab class as a function to apply below
FUN <- match.fun(data.class(mf[[by_var]]))
## extract levels of factor by var,
levs <- levels(mf[[by_var]])
## coerce level for this smooth to correct factor type with FUN
## return as a list with the correct names
newfac <- setNames(list(FUN(by_level(sm), levels = levs)), by_var)
## append this list to the list of new smooth covariate values
newlist <- append(newlist, newfac)
} else {
## continuous by var; set to median among observed values?
## newby <- setNames(list(median(mf[[by_var]]), na.rm = TRUE), by_var)
## no, should be set to 1
newby <- setNames(list(1L), by_var)
newlist <- append(newlist, newby)
}
}
newdata <- exec(expand_grid, !!!newlist) # compute expand.grid-alike
if (isTRUE(include_all)) {
## need to provide single values for all other covariates in data
unused_vars <- dplyr::setdiff(m.terms, used_vars)
## only processed unused_vars if length() > 0L
if (length(unused_vars) > 0L) {
unused_summ <- model[["var.summary"]][unused_vars]
## FIXME: put this in utils.R with a better name!
## this basically just reps the data (scalar) for the closest
## observation to the median over all observations
`rep_fun` <- function(x, n) {
## if `x` isn't a factor, select the second element of `x` which
## is the value of the observation in the data closest to median
## of set of observations in data used to fit the model.
if (!is.factor(x)) {
x <- x[2L]
}
## repeat `x` as many times as is needed
rep(x, times = n)
}
n_new <- NROW(newdata)
unused_data <- as_tibble(lapply(unused_summ,
FUN = rep_fun,
n = n_new
))
## add unnused_data to newdata so we're ready to predict
newdata <- bind_cols(newdata, unused_data)
}
}
newdata # return
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.