Nothing
#'
#' This function is probably redundant. Simply using \code{\link{simulate}}
#' generates data from the correct distribution for objects which inherit
#' class \code{\link[stats]{lm}}. The difference is that I'm trying to add the
#' uncertainty in the parameter estimates.
#'
#' @title Simulate responses from a generalized additive linear model \code{\link[mgcv]{gam}}
#' @name simulate_gam
#' @description By sampling from the vector of coefficients it is possible to simulate
#' data from a \sQuote{gam} model.
#' @param object object of class \code{\link[mgcv]{gam}} or \code{\link[stats]{glm}}.
#' @param psim parameter simulation level (an integer, 0, 1, 2 or 3).
#' @param nsim number of simulations to perform
#' @param resid.type type of residual to use. \sQuote{none}, \sQuote{resample}, \sQuote{normal} or \sQuote{wild}.
#' @param value either \sQuote{matrix} or \sQuote{data.frame}
#' @param ... additional arguments (none used at the moment)
#' @note psim = 3 is not implemented at the moment.
#' @return matrix or data.frame with responses
#' @details
#' These are the options that control the parameter simulation level
#' \describe{
#' \item{psim = 0}{returns the fitted values}
#' \item{psim = 1}{simulates from a beta vector (mean response)}
#' \item{psim = 2}{simulates observations according to the residual type (similar to observed data)}
#' \item{psim = 3}{simulates a beta vector, considers uncertainty in the variance covariance matrix of beta and adds residuals (prediction)}
#' }
#' The residual type (resid.type) controls how the residuals are generated.
#' They are either resampled, simulated from a normal distribution or \sQuote{wild} where the
#' Rademacher distribution is used (\url{https://en.wikipedia.org/wiki/Rademacher_distribution}).
#' Resampled and normal both assume iid, but \sQuote{normal} makes the stronger assumption of normality.
#' \sQuote{wild} does not assume constant variance, but it assumes symmetry.
#' @seealso \code{\link{predict}}, \code{\link[mgcv]{predict.gam}}, \code{\link{simulate}} and \code{\link{simulate_lm}}.
#' @references Generalized Additive Models. An Introduction with R. Second Edition. (2017) Simon N. Wood. CRC Press.
#' @note The purpose of this function is to make it compatible with other functions in this
#' package. It has some limitations compared to the functions in the \sQuote{see also} section.
#' @export
#' @examples
#' \donttest{
#' require(ggplot2)
#' require(mgcv)
#' ## These count data are from GAM book by Simon Wood (pg. 132) - see reference
#' y <- c(12, 14, 33, 50, 67, 74, 123, 141, 165, 204, 253, 246, 240)
#' t <- 1:13
#' dat <- data.frame(y = y, t = t)
#' fit <- gam(y ~ t + I(t^2), family = poisson, data = dat)
#' sims <- simulate_gam(fit, nsim = 100, value = "data.frame")
#'
#' ggplot(data = sims) +
#' geom_line(aes(x = t, y = sim.y, group = ii),
#' color = "gray", alpha = 0.5) +
#' geom_point(aes(x = t, y = y))
#' }
#'
simulate_gam <- function(object, nsim = 1, psim = 1,
resid.type = c("none", "resample", "normal", "wild"),
value = c("matrix", "data.frame"), ...){
if(!inherits(object, "glm")) stop("Object should be of class 'glm' or 'gam'")
resid.type <- match.arg(resid.type)
value <- match.arg(value)
xargs <- list(...)
if(is.null(xargs$newdata)){
newdata <- NULL
}else{
newdata <- xargs$newdata
}
if(is.null(xargs$exclude)){
exclude <- NULL
}else{
exclude <- xargs$exclude
}
if(!is.null(newdata) && psim > 1)
stop("'newdata' is not compatible with 'psim > 1'")
nr <- ifelse(is.null(newdata), stats::nobs(object), nrow(newdata))
ans.mat <- matrix(nrow = nr, ncol = nsim)
if(!is.null(newdata) && value == "data.frame")
stop("'newdata' is not compatible with 'value = data.frame'")
## They say that replicate is faster than this,
## but not when storage is pre-allocated
for(i in 1:nsim){
ans.mat[,i] <- simulate_gam_one(object, psim = psim, resid.type = resid.type,
newdata = newdata, exclude = exclude)
}
if(value == "matrix"){
colnames(ans.mat) <- paste0("sim_",1:nsim)
return(ans.mat)
}else{
dfr <- eval(getCall(object)$data)
if(is.null(dfr)) stop("data argument should be supplied")
## I think this is guaranteed to exist
## but it can be weird
ans.dat <- data.frame(ii = as.factor(rep(1:nsim, each = nrow(dfr))),
dfr,
sim.y = c(ans.mat),
row.names = 1:c(nsim * nr))
return(ans.dat)
}
}
simulate_gam_one <- function(object, psim = 1,
resid.type = c("none", "resample", "normal", "wild"),
newdata = NULL, exclude = NULL){
resid.type <- match.arg(resid.type)
n <- stats::nobs(object)
if(!is.null(newdata)){
## See section 7.2.6 in Wood's GAM book. pg. 339
## Check that names are correct, Is this needed?
if(!identical(names(newdata), attr(object$terms, "term.labels")))
stop("names in 'newdata' do not correspond to 'term.labels'")
X <- predict(object, newdata = newdata, type = "lpmatrix", exclude = exclude)
}else{
X <- predict(object, type = "lpmatrix", exclude = exclude)
}
if(resid.type == "resample"){
rsd0 <- residuals(object, type = "scaled.pearson") ## Are these ~ N(0, 1)?
rsd.sd <- sqrt(object$family$variance(fitted(object)))
rsds <- sample(rsd0, size = n, replace = TRUE) * rsd.sd
}
if(resid.type == "normal"){
rsd.sd <- sqrt(object$family$variance(fitted(object)))
rsds <- rnorm(n = n, sd = rsd.sd)
}
if(resid.type == "wild"){
rsd0 <- residuals(object, type = "response")
rsds <- sample(c(-1, 1), size = n, replace = TRUE) * rsd0
}
## Nothing is simulated, fitted values
if(psim == 0){
betav <- stats::coef(object)
ans <- object$family$linkinv(X %*% betav)
}
## Simulate from coefficient vector only (beta)
if(psim == 1){
betav <- MASS::mvrnorm(mu = coef(object), Sigma = vcov(object))
ans <- object$family$linkinv(X %*% betav)
}
## Simulate using default simulate.lm method
## I think this works because simulate automatically works in the
## response scale
if(psim == 2 && resid.type == "none"){
ans <- simulate(object, nsim = 1)[,1]
}
if(psim == 2 && resid.type != "none"){
betav <- MASS::mvrnorm(mu = coef(object), Sigma = vcov(object))
ans <- object$family$linkinv(X %*% betav) + rsds
## not sure for how many families only a discrete response makes sense
if(object$family$family == "poisson") ans <- round(ans)
}
if(psim == 3){
stop("Not implemented yet")
}
return(ans)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.