#' Simulating responses from a GAM object
#'
#' @description This method can be used to simulate vectors of responses from a gamObject.
#'
#' @param object the output of a \code{gam()} or \code{bam()} call.
#' @param nsim the number of simulated vectors of responses. A positive integer.
#' @param seed numeric; a random seed for the simulations.
#' @param method the method used for the simulation. If set to "rd" then \code{o$family$rd()} will be used, if available. If set to "qf" then \code{o$family$qf()} (which is the inverse cdf of the response distribution) will be used to transform some uniform variates.
#' @param trans function used to transform or summarize each vector of simulated responses. It must take a vector as argument, but it can output a vector or a scalar. Potentially useful for saving storage (e.g. by transforming each simulated vector to a scalar). If left to \code{NULL} then \code{trans = identity} will be used.
#' @param newdata Optional new data frame or list to be passed to \link{predict.gam}.
#' @param u a matrix where each row is a vector of uniform random variables in (0, 1). This will be used to simulate responses only if \code{method = "qf"}.
#' @param w vector of prior weights to be used in the simulations. If \code{newdata==NULL} then \code{w} is set to \code{object$prior.weights} otherwise it is a vector of ones.
#' @param offset numeric vector of offsets. For GAMs with multiple linear predictor (see eg \link{gaulss}) it must be a list of vectors. NB: if \code{newdata!=NULL} the offsets will be assumed to be zero, unless their are explicitly provided. If \code{newdata==NULL} then simulations will use the offsets used during model fitting, and \code{offset} argument will be ignored.
#' @param ... currently not used.
#' @return A matrix where each column is a vector of simulated responses. The number of rows is equal to the number of responses in the fitted object.
#' @examples
#' library(mgcv)
#'
#' set.seed(2) ## simulate some data...
#' dat <- gamSim(1,n=400,dist="normal",scale=2)
#' b <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),data=dat)
#'
#' # Simulate three vectors of responses
#' matplot(simulate(b, nsim = 3), pch = 19, col = c(1, 3, 4))
#'
#' @importFrom stats simulate
#' @importFrom plyr raply aaply laply rlply
#' @importFrom mgcv rmvn
#' @export simulate.gam
#' @export
#' @author based on mgcViz::simulate.gam by Matteo Fasiolo
#'
simulate.gam <- function(object, nsim = 1, seed = NULL, method = "auto",
newdata, u = NULL, w = NULL, offset = NULL,
trans = NULL, ...){
# set seed
# taken from gratia::simulate.gam by Gavin Simpson
if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) {
runif(1)
}
if (is.null(seed)) {
RNGstate <- get(".Random.seed", envir = .GlobalEnv)
} else {
R.seed <- get(".Random.seed", envir = .GlobalEnv)
set.seed(seed)
RNGstate <- structure(seed, kind = as.list(RNGkind()))
on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
}
o <- object
method <- match.arg(method, c("auto", "rd", "qf"))
if ( is.null(o$sig2) ){ o$sig2 <- summary(o)$dispersion }
if( is.null(trans) ) { trans <- identity }
# Either (a) use data in GAM object or (b) predict using new data
if( missing(newdata) ){ # (a) the offset should already be included in o$fitted.values
mu <- o$fitted.values
if( is.null(w) ) { w <- o$prior.weights }
if( !is.null(offset) ) { message("simulate.gam: offset argument ignored. No newdata provided, so offset is already in object$fitted.values")}
} else{ # (b) the user-defined offset is added to linear predictor
mu <- predict(o, newdata = newdata, type = "link")
if( is.null(w) ){ w <- mu*0 + 1 }
# Dealing with offset and inverting link function
form <- o$formula
if( is.list(form) ){ # [1] GAMLSS case
n <- length( mu[[1]] )
nte <- length( form )
lnki <- lapply(o$family$linfo, "[[", "linkinv")
if( is.null(offset) ) { offset <- rlply(nte, { numeric(n) }) }
mu <- t( laply(1:nte, function(.ii){ lnki[[.ii]](mu[ , .ii] + offset[[.ii]]) }) )
} else { # [2] GAM case
if( is.null(offset) ) { offset <- mu * 0 }
mu <- o$family$linkinv( mu + offset )
}
}
# Special cases: return straight away
if(o$family$family == "Multivariate normal"){
out <- rlply(nsim, function(.nouse) rmvn(nrow(mu), mu, solve(crossprod(o$family$data$R))))
return( out )
}
out <- .simulate.gam(mu = mu, w = w, sig = o$sig2, method = method,
fam = o$family, nsim = nsim, u = u, trans = trans)
# We want nsim rows and number of columns depending on trans()
if( is.vector(out) ) {
out <- as.matrix( out )
if(nsim == 1) { out <- t(out) }
}
return( t(unname(out)) )
}
# Internal function to simulate observations for given family
#
#' @importFrom mgcv fix.family.qf
#' @importFrom stats runif predict
.simulate.gam <- function(mu, w, sig, method, fam, nsim, u, trans) {
# Try method == 'rd', if that does not work 'qf', if that is not good either throw an error
if( method == "auto" ){
fam <- fix_family_rd(fam)
if( is.null(fam$rd) ) {
fam <- fix.family.qf(fam)
method <- 'qf'
} else {
method <- "rd"
}
if( method=="qf" && is.null(fam$qf) ) { stop("No simulation method available for this family") }
}
if( method == "rd" ){
if( is.null(fam$rd) ) { fam <- fix_family_rd(fam) }
if( is.null(fam$rd) ) { stop( "fam$rd unavailable, try using method = `qf`") }
sim <- raply(nsim, { trans(fam$rd(mu, w, sig)) } )
}
if( method == "qf" ){
if( is.null(fam$qf) ) { fam <- fix.family.qf(fam) }
if( is.null(fam$qf) ) { stop( "fam$qf unavailable, try using method = `rd`") }
nobs <- length( mu )
sim <- if( is.null(u) ){
raply(nsim, {
trans(fam$qf(runif(nobs), mu, w, sig))
} )
} else {
aaply(u, 1, function(.u) { trans(fam$qf(.u)) }, mu = mu, wt = w, scale = sig)
}
}
return( sim )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.