#' Specify autoregressive dynamic processes in \pkg{mvgam}
#'
#' Set up autoregressive or autoregressive moving average trend models
#' in \pkg{mvgam}. These functions do not evaluate their arguments –
#' they exist purely to help set up a model with particular autoregressive
#' trend models.
#' @param ma \code{Logical} Include moving average terms of order \code{1}?
#' Default is \code{FALSE}.
#' @param cor \code{Logical} Include correlated process errors as part of a
#' multivariate normal process model? If \code{TRUE} and if \code{n_series > 1}
#' in the supplied data, a fully structured covariance matrix will be estimated
#' for the process errors. Default is \code{FALSE}.
#' @param p A non-negative integer specifying the autoregressive (AR) order.
#' Default is \code{1}. Cannot currently be larger than \code{3} for `AR` terms,
#' and cannot be anything other than `1` for continuous time AR (`CAR`) terms
#' @param gr An optional grouping variable, which must be a `factor` in the supplied `data`,
#' for setting up hierarchical residual correlation structures. If specified,
#' this will automatically set `cor = TRUE` and set up a model where the
#' residual correlations for a specific level of `gr` are modelled hierarchically:
#' \eqn{\Omega_{group} = \alpha_{cor}\Omega_{global} + (1 - \alpha_{cor})\Omega_{group, local}},
#' where \eqn{\Omega_{global}} is a *global* correlation
#' matrix, \eqn{\Omega_{group, local}} is a *local deviation* correlation matrix
#' and \eqn{\alpha_{cor}} is a weighting parameter
#' controlling how strongly the local correlation matrix \eqn{\Omega_{group}}
#' is shrunk towards the global
#' correlation matrix \eqn{\Omega_{global}} (larger values of \eqn{\alpha_{cor}} indicate
#' a greater degree of shrinkage, i.e. a greater degree of partial pooling). When used
#' within a `VAR()` model, this essentially sets up a hierarchical panel vector autoregression
#' where both the autoregressive and correlation matrices are learned hierarchically.
#' If `gr` is supplied then `subgr` *must* also be supplied
#' @param subgr A subgrouping `factor` variable specifying which element in `data` represents the
#' different time series. Defaults to `series`, but note that
#' models that use the hierarchical correlations, where the `subgr` time series are measured in each level of `gr`,
#' *should not* include a `series` element in `data`. Rather, this element will be created internally based
#' on the supplied variables for `gr` and `subgr`. For example, if you are modelling
#' temporal counts for a group of species (labelled as `species` in `data`) across three
#' different geographical regions (labelled as `region`),
#' and you would like the residuals to be correlated within regions,
#' then you should specify `gr = region` and `subgr = species`. Internally, `mvgam()` will create
#' the `series` element for the data using: `series = interaction(group, subgroup, drop = TRUE))`
#' @return An object of class \code{mvgam_trend}, which contains a list of
#' arguments to be interpreted by the parsing functions in \pkg{mvgam}
#' @rdname RW
#' @details Use `vignette("mvgam_overview")` to see the full details of available
#' stochastic trend types in \pkg{mvgam}, or view the rendered version on the package
#' website at: https://nicholasjclark.github.io/mvgam/articles/mvgam_overview.html
#' @author Nicholas J Clark
#'@examples
#'\donttest{
#'# A short example to illustrate CAR(1) models
#'# Function to simulate CAR1 data with seasonality
#'sim_corcar1 = function(n = 125,
#' phi = 0.5,
#' sigma = 2,
#' sigma_obs = 0.75){
#'# Sample irregularly spaced time intervals
#'time_dis <- c(1, runif(n - 1, 0, 5))
#'
#'# Set up the latent dynamic process
#'x <- vector(length = n); x[1] <- -0.3
#'for(i in 2:n){
#' # zero-distances will cause problems in sampling, so mvgam uses a
#' # minimum threshold; this simulation function emulates that process
#' if(time_dis[i] == 0){
#' x[i] <- rnorm(
#' 1, mean = (phi^1e-3) * x[i - 1],
#' sd = sigma * (1 - phi^(2*1e-3)) / (1 - phi^2)
#' )
#' } else {
#' x[i] <- rnorm(
#' 1,
#' mean = (phi^time_dis[i]) * x[i - 1],
#' sd = sigma * (1 - phi^(2*time_dis[i])) / (1 - phi^2)
#' )
#' }
#' }
#'
#'# Add 12-month seasonality
#' cov1 <- sin(2 * pi * (1 : n) / 12); cov2 <- cos(2 * pi * (1 : n) / 12)
#' beta1 <- runif(1, 0.3, 0.7); beta2 <- runif(1, 0.2, 0.5)
#' seasonality <- beta1 * cov1 + beta2 * cov2
#'
#'# Take Gaussian observations with error and return
#' data.frame(y = rnorm(n, mean = x + seasonality, sd = sigma_obs),
#' season = rep(1:12, 20)[1:n],
#' time = cumsum(time_dis))
#'}
#'
#'# Sample two time series
#'dat <- rbind(dplyr::bind_cols(sim_corcar1(phi = 0.65,
#' sigma_obs = 0.55),
#' data.frame(series = 'series1')),
#' dplyr::bind_cols(sim_corcar1(phi = 0.8,
#' sigma_obs = 0.35),
#' data.frame(series = 'series2'))) %>%
#' dplyr::mutate(series = as.factor(series))
#'
#'# mvgam with CAR(1) trends and series-level seasonal smooths; the
#'# State-Space representation (using trend_formula) will be more efficient;
#'# using informative priors on the sigmas often helps with convergence
#'mod <- mvgam(formula = y ~ -1,
#' trend_formula = ~ s(season, bs = 'cc',
#' k = 5, by = trend),
#' trend_model = CAR(),
#' priors = c(prior(exponential(3),
#' class = sigma),
#' prior(beta(4, 4),
#' class = sigma_obs)),
#' data = dat,
#' family = gaussian(),
#' chains = 2,
#' silent = 2)
#'
#'# View usual summaries and plots
#'summary(mod)
#'conditional_effects(mod, type = 'expected')
#'plot(mod, type = 'trend', series = 1)
#'plot(mod, type = 'trend', series = 2)
#'plot(mod, type = 'residuals', series = 1)
#'plot(mod, type = 'residuals', series = 2)
#'mcmc_plot(mod,
#' variable = 'ar1',
#' regex = TRUE,
#' type = 'hist')
#'
#'# Now an example illustrating hierarchical dynamics
#'set.seed(123)
#'# Simulate three species monitored in three different
#'# regions, where dynamics can potentially vary across regions
#'simdat1 <- sim_mvgam(trend_model = VAR(cor = TRUE),
#' prop_trend = 0.95,
#' n_series = 3,
#' mu = c(1, 2, 3))
#'simdat2 <- sim_mvgam(trend_model = VAR(cor = TRUE),
#' prop_trend = 0.95,
#' n_series = 3,
#' mu = c(1, 2, 3))
#'simdat3 <- sim_mvgam(trend_model = VAR(cor = TRUE),
#' prop_trend = 0.95,
#' n_series = 3,
#' mu = c(1, 2, 3))
#'
#'# Set up the data but DO NOT include 'series'
#'all_dat <- rbind(simdat1$data_train %>%
#' dplyr::mutate(region = 'qld'),
#' simdat2$data_train %>%
#' dplyr::mutate(region = 'nsw'),
#' simdat3$data_train %>%
#' dplyr::mutate(region = 'vic')) %>%
#' dplyr::mutate(species = gsub('series', 'species', series),
#' species = as.factor(species),
#' region = as.factor(region)) %>%
#' dplyr::arrange(series, time) %>%
#' dplyr::select(-series)
#'
#'# Check priors for a hierarchical AR1 model
#'get_mvgam_priors(formula = y ~ species,
#' trend_model = AR(gr = region, subgr = species),
#' data = all_dat)
#'
#'# Fit the model
#'mod <- mvgam(formula = y ~ species,
#' trend_model = AR(gr = region, subgr = species),
#' data = all_dat,
#' chains = 2,
#' silent = 2)
#'
#'# Check standard outputs
#'summary(mod)
#'conditional_effects(mod, type = 'link')
#'
#'# Inspect posterior estimates for the correlation weighting parameter
#'mcmc_plot(mod, variable = 'alpha_cor', type = 'hist')
#'}
#' @export
RW = function(ma = FALSE, cor = FALSE, gr = NA, subgr = NA) {
# Validate the supplied groupings and correlation argument
gr <- deparse0(substitute(gr))
subgr <- deparse0(substitute(subgr))
if (gr == 'NA') {
subgr <- 'series'
}
if (gr != 'NA') {
if (subgr == 'NA') {
stop(
'argument "subgr" must be supplied if "gr" is also supplied',
call. = FALSE
)
} else if (subgr == 'series') {
stop(
'argument "subgr" cannot be set to "series" if "gr" is also supplied',
call. = FALSE
)
} else {
cor <- TRUE
}
}
out <- structure(
list(
trend_model = 'RW',
ma = ma,
cor = cor,
unit = 'time',
gr = gr,
subgr = subgr,
label = match.call()
),
class = 'mvgam_trend'
)
}
#' @rdname RW
#' @export
AR = function(p = 1, ma = FALSE, cor = FALSE, gr = NA, subgr = NA) {
validate_pos_integer(p)
if (p > 3) {
stop("Argument 'p' must be <= 3", call. = FALSE)
}
# Validate the supplied groupings and correlation argument
gr <- deparse0(substitute(gr))
subgr <- deparse0(substitute(subgr))
if (gr == 'NA') {
subgr <- 'series'
}
if (gr != 'NA') {
if (subgr == 'NA') {
stop(
'argument "subgr" must be supplied if "gr" is also supplied',
call. = FALSE
)
} else if (subgr == 'series') {
stop(
'argument "subgr" cannot be set to "series" if "gr" is also supplied',
call. = FALSE
)
} else {
cor <- TRUE
}
}
out <- structure(
list(
trend_model = paste0('AR', p),
ma = ma,
cor = cor,
unit = 'time',
gr = gr,
subgr = subgr,
label = match.call()
),
class = 'mvgam_trend'
)
}
#' @rdname RW
#' @export
CAR = function(p = 1) {
validate_pos_integer(p)
if (p > 1) {
stop("Argument 'p' must be = 1", call. = FALSE)
}
out <- structure(
list(
trend_model = paste0('CAR', p),
ma = FALSE,
cor = FALSE,
unit = 'time',
gr = 'NA',
subgr = 'series',
label = match.call()
),
class = 'mvgam_trend'
)
}
#' @rdname RW
#' @export
VAR = function(ma = FALSE, cor = FALSE, gr = NA, subgr = NA) {
# Validate the supplied groupings and correlation argument
gr <- deparse0(substitute(gr))
subgr <- deparse0(substitute(subgr))
if (gr == 'NA') {
subgr <- 'series'
}
if (gr != 'NA') {
if (subgr == 'NA') {
stop(
'argument "subgr" must be supplied if "gr" is also supplied',
call. = FALSE
)
} else if (subgr == 'series') {
stop(
'argument "subgr" cannot be set to "series" if "gr" is also supplied',
call. = FALSE
)
} else {
cor <- TRUE
}
}
out <- structure(
list(
trend_model = 'VAR',
ma = ma,
cor = cor,
unit = 'time',
gr = gr,
subgr = subgr,
label = match.call()
),
class = 'mvgam_trend'
)
}
#' Specify dynamic Gaussian process trends in \pkg{mvgam} models
#'
#' Set up low-rank approximate Gaussian Process trend models using Hilbert
#' basis expansions in \pkg{mvgam}. This function does not evaluate its arguments –
#' it exists purely to help set up a model with particular GP
#' trend models.
#' @param ... unused
#' @return An object of class \code{mvgam_trend}, which contains a list of
#' arguments to be interpreted by the parsing functions in \pkg{mvgam}
#' @details A GP trend is estimated for each series using Hilbert space
#' approximate Gaussian Processes.
#' In `mvgam`, latent squared exponential GP trends are approximated using by
#' default \code{20} basis functions and using a multiplicative factor of `c = 5/4`,
#' which saves computational costs compared to fitting full GPs while adequately estimating
#' GP \code{alpha} and \code{rho} parameters.
#' @rdname GP
#' @author Nicholas J Clark
#' @references Riutort-Mayol G, Burkner PC, Andersen MR, Solin A and Vehtari A (2023).
#' Practical Hilbert space approximate Bayesian Gaussian processes for probabilistic
#' programming. Statistics and Computing 33, 1. https://doi.org/10.1007/s11222-022-10167-2
#' @seealso \code{\link[brms]{gp}}
#' @export
GP = function(...) {
out <- structure(
list(
trend_model = 'GP',
ma = FALSE,
cor = FALSE,
unit = 'time',
gr = 'NA',
subgr = 'series',
label = match.call()
),
class = 'mvgam_trend'
)
}
#' Specify piecewise linear or logistic trends in \pkg{mvgam} models
#'
#' Set up piecewise linear or logistic trend models
#' in \code{mvgam}. These functions do not evaluate their arguments –
#' they exist purely to help set up a model with particular piecewise
#' trend models.
#' @param n_changepoints A non-negative integer specifying the number of potential
#' changepoints. Potential changepoints are selected uniformly from the
#' first `changepoint_range` proportion of timepoints in \code{data}. Default is `10`
#' @param changepoint_range Proportion of history in \code{data} in which trend changepoints
#' will be estimated. Defaults to 0.8 for the first 80%.
#' @param changepoint_scale Parameter modulating the flexibility of the
#' automatic changepoint selection by altering the scale parameter of a Laplace distribution.
#' The resulting prior will be `double_exponential(0, changepoint_scale)`.
#' Large values will allow many changepoints and a more flexible trend, while
#' small values will allow few changepoints. Default is `0.05`.
#' @param growth Character string specifying either 'linear' or 'logistic' growth of
#' the trend. If 'logistic', a variable labelled `cap` MUST be in \code{data} to specify the
#' maximum saturation point for the trend (see details and examples in \code{\link{mvgam}} for
#' more information).
#' Default is 'linear'.
#' @author Nicholas J Clark
#' @references Taylor, Sean J., and Benjamin Letham. "Forecasting at scale." The American Statistician 72.1 (2018): 37-45.
#' @return An object of class \code{mvgam_trend}, which contains a list of
#' arguments to be interpreted by the parsing functions in \code{mvgam}
#' @details
#'*Offsets and intercepts*:
#' For each of these trend models, an offset parameter is included in the trend
#' estimation process. This parameter will be incredibly difficult to identify
#' if you also include an intercept in the observation formula. For that reason,
#' it is highly recommended that you drop the intercept from the formula
#' (i.e. `y ~ x + 0` or `y ~ x - 1`, where `x` are your optional predictor terms).
#'\cr
#'\cr
#'*Logistic growth and the cap variable*:
#' When forecasting growth, there is often some maximum achievable point that
#' a time series can reach. For example, total market size, total population size
#' or carrying capacity in population dynamics. It can be advantageous for the forecast
#' to saturate at or near this point so that predictions are more sensible.
#' This function allows you to make forecasts using a logistic growth trend model,
#' with a specified carrying capacity. Note that this capacity does not need to be static
#' over time, it can vary with each series x timepoint combination if necessary. But you
#' must supply a `cap` value for each observation in the data when using `growth = 'logistic'`.
#' For observation families that use a non-identity link function, the `cap` value will
#' be internally transformed to the link scale (i.e. your specified `cap` will be log
#' transformed if you are using a `poisson()` or `nb()` family). It is therefore important
#' that you specify the `cap` values on the scale of your outcome. Note also that
#' no missing values are allowed in `cap`.
#'
#' @rdname piecewise_trends
#' @examples
#' \donttest{
#' # Example of logistic growth with possible changepoints
#' # Simple logistic growth model
#' dNt = function(r, N, k){
#' r * N * (k - N)
#' }
#'
#' # Iterate growth through time
#' Nt = function(r, N, t, k) {
#' for (i in 1:(t - 1)) {
#'
#' # population at next time step is current population + growth,
#' # but we introduce several 'shocks' as changepoints
#' if(i %in% c(5, 15, 25, 41, 45, 60, 80)){
#' N[i + 1] <- max(1, N[i] + dNt(r + runif(1, -0.1, 0.1),
#' N[i], k))
#' } else {
#' N[i + 1] <- max(1, N[i] + dNt(r, N[i], k))
#' }
#' }
#' N
#' }
#'
#' # Simulate expected values
#' set.seed(11)
#' expected <- Nt(0.004, 2, 100, 30)
#' plot(expected, xlab = 'Time')
#'
#' # Take Poisson draws
#' y <- rpois(100, expected)
#' plot(y, xlab = 'Time')
#'
#' # Assemble data into dataframe and model. We set a
#' # fixed carrying capacity of 35 for this example, but note that
#' # this value is not required to be fixed at each timepoint
#' mod_data <- data.frame(y = y,
#' time = 1:100,
#' cap = 35,
#' series = as.factor('series_1'))
#' plot_mvgam_series(data = mod_data)
#'
#' # The intercept is nonidentifiable when using piecewise
#' # trends because the trend functions have their own offset
#' # parameters 'm'; it is recommended to always drop intercepts
#' # when using these trend models
#' mod <- mvgam(y ~ 0,
#' trend_model = PW(growth = 'logistic'),
#' family = poisson(),
#' data = mod_data,
#' chains = 2,
#' silent = 2)
#' summary(mod)
#'
#' # Plot the posterior hindcast
#' hc <- hindcast(mod)
#' plot(hc)
#'
#' # View the changepoints with ggplot2 utilities
#' library(ggplot2)
#' mcmc_plot(mod, variable = 'delta_trend',
#' regex = TRUE) +
#' scale_y_discrete(labels = mod$trend_model$changepoints) +
#' labs(y = 'Potential changepoint',
#' x = 'Rate change')
#'
#' # Generate a methods description scaffold
#' how_to_cite(mod)
#' }
#' @export
PW = function(
n_changepoints = 10,
changepoint_range = 0.8,
changepoint_scale = 0.05,
growth = 'linear'
) {
growth <- match.arg(growth, choices = c('linear', 'logistic'))
validate_proportional(changepoint_range)
validate_pos_integer(n_changepoints)
validate_pos_real(changepoint_scale)
trend_model <- 'PWlinear'
if (growth == 'logistic') {
trend_model = 'PWlogistic'
}
out <- structure(
list(
trend_model = trend_model,
n_changepoints = n_changepoints,
changepoint_range = changepoint_range,
changepoint_scale = changepoint_scale,
ma = FALSE,
cor = FALSE,
unit = 'time',
gr = 'NA',
subgr = 'series',
label = match.call()
),
class = 'mvgam_trend'
)
}
#' Specify correlated residual processes in \pkg{mvgam}
#'
#' Set up latent correlated multivariate Gaussian residual processes
#' in \pkg{mvgam}. This function does not evaluate it's arguments –
#' it exists purely to help set up a model with particular error processes.
#' @param unit The unquoted name of the variable that represents the unit of analysis in `data` over
#' which latent residuals should be correlated. This variable should be either a
#' `numeric` or `integer` variable in the supplied `data`.
#' Defaults to `time` to be consistent with other functionalities
#' in \pkg{mvgam}, though note that the data need not be time series in this case. See examples below
#' for further details and explanations
#' @param gr An optional grouping variable, which must be a `factor` in the supplied `data`,
#' for setting up hierarchical residual correlation structures. If specified,
#' this will automatically set up a model where the
#' residual correlations for a specific level of `gr` are modelled hierarchically:
#' \eqn{\Omega_{group} = p\Omega_{global} + (1 - p)\Omega_{group, local}},
#' where \eqn{\Omega_{global}} is a *global* correlation
#' matrix, \eqn{\Omega_{group, local}} is a *local deviation* correlation matrix
#' and \eqn{p} is a weighting parameter
#' controlling how strongly the local correlation matrix \eqn{\Omega_{group}} is shrunk towards the global
#' correlation matrix \eqn{\Omega_{global}}. If `gr` is supplied then `subgr` *must* also be supplied
#' @param subgr A subgrouping `factor` variable specifying which element in `data` represents the
#' different observational units. Defaults to `series` to be consistent with other functionalities
#' in \pkg{mvgam}, though note that the data need not be time series in this case.
#' But note that models that use the hierarchical correlations (by supplying a value for `gr`)
#' *should not* include a `series` element in `data`. Rather, this element will be created internally based
#' on the supplied variables for `gr` and `subgr`. For example, if you are modelling
#' counts for a group of species (labelled as `species` in the data) across sampling sites
#' (labelled as `site` in the data) in three
#' different geographical regions (labelled as `region`), and you would like the residuals to be correlated
#' within regions, then you should specify
#' `unit = site`, `gr = region`, and `subgr = species`. Internally, `mvgam()` will appropriately order
#' the data by `unit` (in this case, by `site`) and create
#' the `series` element for the data using something like: `series = as.factor(paste0(group, '_', subgroup))`
#' @return An object of class \code{mvgam_trend}, which contains a list of
#' arguments to be interpreted by the parsing functions in \pkg{mvgam}
#' @examples
#'\donttest{
#'# Simulate counts of four species over ten sampling locations
#'site_dat <- data.frame(site = rep(1:10, 4),
#' species = as.factor(sort(rep(letters[1:4], 10))),
#' y = c(NA, rpois(39, 3)))
#'head(site_dat)
#'
#'# Set up a correlated residual (i.e. Joint Species Distribution) model,
#'# where 'site' represents the unit of analysis
#'trend_model <- ZMVN(unit = site, subgr = species)
#'mod <- mvgam(y ~ species,
#' trend_model = ZMVN(unit = site,
#' subgr = species),
#' data = site_dat,
#' chains = 2,
#' silent = 2)
#'
#'# Inspect the estimated species-species residual covariances
#'mcmc_plot(mod, variable = 'Sigma', regex = TRUE, type = 'hist')
#'
#'# A hierarchical correlation example; set up correlated counts
#'# for three species across two sampling locations
#'Sigma <- matrix(c(1, -0.4, 0.5,
#' -0.4, 1, 0.3,
#' 0.5, 0.3, 1),
#' byrow = TRUE,
#' nrow = 3)
#'Sigma
#'
#'make_site_dat = function(...){
#' errors <- mgcv::rmvn(n = 30,
#' mu = c(0.6, 0.8, 1.8),
#' V = Sigma)
#' site_dat <- do.call(rbind, lapply(1:3, function(spec){
#' data.frame(y = rpois(30,
#' lambda = exp(errors[, spec])),
#' species = paste0('species',
#' spec),
#' site = 1:30)
#' }))
#' site_dat
#'}
#'
#'site_dat <- rbind(make_site_dat() %>%
#' dplyr::mutate(group = 'group1'),
#' make_site_dat() %>%
#' dplyr::mutate(group = 'group2')) %>%
#' dplyr::mutate(species = as.factor(species),
#' group = as.factor(group))
#'
#'# Fit the hierarchical correlated residual model
#'mod <- mvgam(y ~ species,
#' trend_model = ZMVN(unit = site,
#' gr = group,
#' subgr = species),
#' data = site_dat)
#'
#'# Inspect the estimated species-species residual covariances
#'mcmc_plot(mod, variable = 'Sigma', regex = TRUE, type = 'hist')
#'}
#'
#' @export
ZMVN = function(unit = time, gr = NA, subgr = series) {
# Validate the supplied groupings and correlation argument
unit <- deparse0(substitute(unit))
gr <- deparse0(substitute(gr))
subgr <- deparse0(substitute(subgr))
if (subgr == 'NA') {
stop('argument "subgr" cannot be NA', call. = FALSE)
}
if (unit == 'NA') {
stop('argument "unit" cannot be NA', call. = FALSE)
}
out <- structure(
list(
trend_model = 'ZMVN',
ma = FALSE,
cor = TRUE,
unit = unit,
gr = gr,
subgr = subgr,
label = match.call()
),
class = 'mvgam_trend'
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.