#' Poisson lognormal model
#'
#' Fit the multivariate Poisson lognormal model with a variational algorithm. Use the (g)lm syntax for model specification (covariates, offsets, weights).
#'
#' @param formula an object of class "formula": a symbolic description of the model to be fitted.
#' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which PLN is called.
#' @param subset an optional vector specifying a subset of observations to be used in the fitting process.
#' @param weights an optional vector of observation weights to be used in the fitting process.
#' @param control a list-like structure for controlling the optimization, with default generated by [PLN_param()]. See the associated documentation
#' for details.
#'
#' @return an R6 object with class [`PLNfit`]
#'
#' @rdname PLN
#' @include PLNfit-class.R
#' @examples
#' data(trichoptera)
#' trichoptera <- prepare_data(trichoptera$Abundance, trichoptera$Covariate)
#' myPLN <- PLN(Abundance ~ 1, data = trichoptera)
#' @seealso The class [`PLNfit`] and the configuration function [PLN_param()]
#' @importFrom stats model.frame model.matrix model.response model.offset model.weights terms
#' @export
PLN <- function(formula, data, subset, weights, control = PLN_param()) {
## Temporary test for deprecated use of list()
if (!inherits(control, "PLNmodels_param"))
stop("We now use the function PLN_param() to generate the list of parameters that controls the fit:
replace 'list(my_arg = xx)' by PLN_param(my_arg = xx) and see the documentation of PLN_param().")
## extract the data matrices and weights
args <- extract_model(match.call(expand.dots = FALSE), parent.frame())
## initialization
if (control$trace > 0) cat("\n Initialization...")
myPLN <- switch(control$covariance,
"diagonal" = PLNfit_diagonal$new(args$Y, args$X, args$O, args$w, args$formula, control),
"spherical" = PLNfit_spherical$new(args$Y, args$X, args$O, args$w, args$formula, control),
"fixed" = PLNfit_fixedcov$new(args$Y, args$X, args$O, args$w, args$formula, control),
# "genet" = PLNfit_$new(args$Y, args$X, args$O, args$w, args$formula, control),
PLNfit$new(args$Y, args$X, args$O, args$w, args$formula, control)) # default: full covariance
## optimization
if (control$trace > 0) cat("\n Adjusting a", myPLN$vcov_model, "covariance PLN model with", control$backend, "optimizer")
myPLN$optimize(args$Y, args$X, args$O, args$w, control$config_optim)
## post-treatment
if (control$trace > 0) cat("\n Post-treatments...")
myPLN$postTreatment(args$Y, args$X, args$O, args$w, control$config_post, control$config_optim)
if (control$trace > 0) cat("\n DONE!\n")
myPLN
}
#' Control of a PLN fit
#'
#' Helper to define list of parameters to control the PLN fit. All arguments have defaults.
#'
#' @param backend optimization back used, either "nlopt" or "torch". Default is "nlopt"
#' @param covariance character setting the model for the covariance matrix. Either "full", "diagonal", "spherical" or "fixed". Default is "full".
#' @param Omega precision matrix of the latent variables. Inverse of Sigma. Must be specified if `covariance` is "fixed"
#' @param config_optim a list for controlling the optimizer (either "nlopt" or "torch" backend). See details
#' @param config_post a list for controlling the post-treatments (optional bootstrap, jackknife, R2, etc.). See details
#' @param trace a integer for verbosity.
#' @param inception Set up the parameters initialization: by default, the model is initialized with a multivariate linear model applied on
#' log-transformed data, and with the same formula as the one provided by the user. However, the user can provide a PLNfit (typically obtained from a previous fit),
#' which sometimes speeds up the inference.
#'
#' @return list of parameters configuring the fit.
#'
#' @details The list of parameters `config_optim` controls the optimizers. When "nlopt" is chosen the following entries are relevant
#' * "algorithm" the optimization method used by NLOPT among LD type, e.g. "CCSAQ", "MMA", "LBFGS". See NLOPT documentation for further details. Default is "CCSAQ".
#' * "maxeval" stop when the number of iteration exceeds maxeval. Default is 10000
#' * "ftol_rel" stop when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-8
#' * "xtol_rel" stop when an optimization step changes every parameters by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6
#' * "ftol_abs" stop when an optimization step changes the objective function by less than ftol_abs. Default is 0.0 (disabled)
#' * "xtol_abs" stop when an optimization step changes every parameters by less than xtol_abs. Default is 0.0 (disabled)
#' * "maxtime" stop when the optimization time (in seconds) exceeds maxtime. Default is -1 (disabled)
#'
#' When "torch" backend is used (only for PLN and PLNLDA for now), the following entries are relevant:
#' * "algorithm" the optimizer used by torch among RPROP (default), RMSPROP, ADAM and ADAGRAD
#' * "maxeval" stop when the number of iteration exceeds maxeval. Default is 10 000
#' * "numepoch" stop training once this number of epochs exceeds numepoch. Set to -1 to enable infinite training. Default is 1 000
#' * "num_batch" number of batches to use during training. Defaults to 1 (use full dataset at each epoch)
#' * "ftol_rel" stop when an optimization step changes the objective function by less than ftol multiplied by the absolute value of the parameter. Default is 1e-8
#' * "xtol_rel" stop when an optimization step changes every parameters by less than xtol multiplied by the absolute value of the parameter. Default is 1e-6
#' * "lr" learning rate. Default is 0.1.
#' * "momentum" momentum factor. Default is 0 (no momentum). Only used in RMSPROP
#' * "weight_decay" Weight decay penalty. Default is 0 (no decay). Not used in RPROP
#' * "step_sizes" pair of minimal (default: 1e-6) and maximal (default: 50) allowed step sizes. Only used in RPROP
#' * "etas" pair of multiplicative increase and decrease factors. Default is (0.5, 1.2). Only used in RPROP
#' * "centered" if TRUE, compute the centered RMSProp where the gradient is normalized by an estimation of its variance weight_decay (L2 penalty). Default to FALSE. Only used in RMSPROP
#'
#' The list of parameters `config_post` controls the post-treatment processing (for most `PLN*()` functions), with the following entries (defaults may vary depending on the specific function, check `config_post_default_*` for defaults values):
#' * jackknife boolean indicating whether jackknife should be performed to evaluate bias and variance of the model parameters. Default is FALSE.
#' * bootstrap integer indicating the number of bootstrap resamples generated to evaluate the variance of the model parameters. Default is 0 (inactivated).
#' * variational_var boolean indicating whether variational Fisher information matrix should be computed to estimate the variance of the model parameters (highly underestimated). Default is FALSE.
#' * sandwich_var boolean indicating whether sandwich estimation should be used to estimate the variance of the model parameters (highly underestimated). Default is FALSE.
#' * rsquared boolean indicating whether approximation of R2 based on deviance should be computed. Default is TRUE
#'
#' @export
PLN_param <- function(
backend = c("nlopt", "torch"),
trace = 1,
covariance = c("full", "diagonal", "spherical", "fixed"),
Omega = NULL,
config_post = list(),
config_optim = list(),
inception = NULL # pretrained PLNfit used as initialization
) {
covariance <- match.arg(covariance)
if (covariance == "fixed") stopifnot(inherits(Omega, "matrix") | inherits(Omega, "Matrix"))
if (!is.null(inception)) stopifnot(isPLNfit(inception))
## post-treatment config
config_pst <- config_post_default_PLN
config_pst[names(config_post)] <- config_post
config_pst$trace <- trace
## optimization config
backend <- match.arg(backend)
stopifnot(backend %in% c("nlopt", "torch"))
if (backend == "nlopt") {
stopifnot(config_optim$algorithm %in% available_algorithms_nlopt)
config_opt <- config_default_nlopt
}
if (backend == "torch") {
stopifnot(config_optim$algorithm %in% available_algorithms_torch)
config_opt <- config_default_torch
}
config_opt[names(config_optim)] <- config_optim
config_opt$trace <- trace
structure(list(
backend = backend ,
trace = trace ,
covariance = covariance,
Omega = Omega ,
config_post = config_pst,
config_optim = config_opt,
inception = inception), class = "PLNmodels_param")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.