#' BCNSM Fit for Multivariate Positive Data
#'
#' Fit a multivariate Box-Cox symmetric distribution generated by an NSM copula via maximum
#' likelihood.
#'
#' @param y a matrix with the sample observations.
#' @param association one of \code{"unstructured"} (default), \code{"uniform"}, or
#' \code{"nonassociative"}, which specify the association matrix of the model.
#' @param copula character; informs which normal scale mixture distribution should be used to generate
#' the NSM copula. Currently, the copulas available are: Gaussian (\code{"gaussian"}),
#' Student's \emph{t} (\code{"t"}), slash (\code{"slash"}), and hyperbolic (\code{"hyp"}).
#' @param delta possible extra parameter associated with the mixing distribution of the copula.
#' For example, the degrees of freedom of the \emph{t} copula, or the heavy tailness parameter of the
#' slash or the hyperbolic copula. To estimate the value of \code{delta} via profile log-likelihood,
#' use the function \code{\link{choose_copula}}.
#' @param margins a character or a character vector; specifies the marginal BCS distributions.
#' If all BCS margins are the same, it is sufficient to enter only one character. A table with
#' the current available BCS distributions can be seen in \code{\link{bcs}}.
#' @param control a list of control arguments specified via \code{\link{control_fit}}.
#' @param ... further arguments passed to \code{\link{control_fit}}.
#'
#' @return The \code{bcnsm_fit} returns an object of class \code{"bcnsm"},
#' which consists of a list with the following components:
#' \describe{
#' \item{mu, sigma, lambda, nu}{vectors with the estimated values for \code{mu}, \code{sigma},
#' \code{lambda}, and \code{nu}, respectively.}
#' \item{gamma}{the estimated parameters of the association matrix, if any.}
#' \item{margins}{a character vector with the marginal BCS distributions of the fit.}
#' \item{association}{structure of the association matrix. It can be one of \code{"non-associative"},
#' \code{"unstructured"}, or \code{"uniform"}.}
#' \item{copula, delta}{\code{"copula"} is a character which informs which normal scale mixture distribution
#' was used to generate the NSM copula and \code{"delta"} is the possible extra parameter associated with
#' the copula.}
#' \item{logLik}{log-likelihood of the fitted model.}
#' \item{vcov}{asymptotic covariance matrix of the maximum likelihood estimator of the model parameters vector.
#' Specifically, the inverse of the observed information matrix, obtained via numeric Hessian matrix.}
#' \item{y}{the response matrix.}
#' \item{optim_params}{control optimization parameters used by \code{\link{control_fit}}.}
#' \item{nobs,d}{the number of observations in the sample and the dimension of the response variable, respectively.}
#' \item{call}{ the function call.}
#' }
#'
#' @references Vanegas, L. H., and Paula, G. A. (2016). Log-symmetric distributions: statistical properties and
#' parameter estimation. \emph{Brazilian Journal of Probability and Statistics}, 30, 196-220.
#'
#' Ferrari, S. L. P., and Fumes, G. (2017). Box-Cox symmetric distributions and applications to
#' nutritional data. \emph{AStA Advances in Statistical Analysis}, 101, 321-344.
#'
#' Medeiros, R. M. R. de, and Ferrari, S. L. P. (2023). Multivariate Box-Cox symmetric distributions
#' generated by a normal scale mixture copula.
#'
#' @author Rodrigo M. R. de Medeiros <\email{rodrigo.matheus@live.com}>
#'
#' @export
#'
#' @examples
#' \dontrun{
#' ## Random sampling
#'
#' ### Sample size and dimension
#' n <- 200
#' d <- 4
#'
#' ### Association matrix
#' Gamma <- matrix(0.8, d, d)
#' diag(Gamma) <- 1
#'
#' ### Marginal specifications
#'
#' margins <- c("bcno", "lt", "bct", "lno")
#'
#' mu <- c(19, 20, 15, 20)
#' sigma <- c(0.2, 0.3, 0.4, 0.3)
#' lambda <- c(-1, NA, 1.6, NA)
#' nu <- c(NA, 4, 8, NA)
#'
#' ### Copula
#' copula <- "t"
#' delta <- 4
#'
#' ### Observations
#' set.seed(123) # Seed for reproducibility
#' y <- rbcnsm(n, mu, sigma, lambda, nu, Gamma, copula, delta, margins)
#' colnames(y) <- c("y1", "y2", "y3", "y4")
#'
#' ### Visualization (based on graphics::pairs functions)
#' mvplot(y)
#'
#' ## Fit with Gaussian copula and uniform structure
#' fit <- bcnsm_fit(y, association = "uniform", copula = "gaussian",
#' margins = c("bcno", "lt", "bct", "lno"))
#'
#' class(fit)
#' methods(class = "bcnsm")
#'
#' # Fit summaries
#' fit
#' summary(fit)
#'
#' # Fit visualization
#'
#' ## Bivariate fit
#' plot(fit)
#'
#' ## Marginal fit
#' plot(fit, type = "margins")
#'
#' ## Transformed vectors
#' plot(fit, "epsilon")
#'
#' # Choose the value of the extra parameter of the t copula (it can be slow)
#' fit_t <- choose_copula(fit, grid = 1:8, copula = "t")
#'
#' ## Final fit
#' final_fit <- fit_t[[4]]
#'
#' final_fit
#' plot(final_fit)
#' plot(final_fit, type = "margins")
#' }
bcnsm_fit <- function(y, association = c("unstructured", "uniform", "nonassociative"),
copula = c("gaussian", "t", "slash", "hyp"),
delta = NULL, margins = "bcno",
control = control_fit(...), ...) {
cl <- match.call()
# Control parameters
## Specific optim parameters
method <- control$method
maxit <- control$maxit
hessian <- control$hessian
## Initial values
start <- control$start
mu_inits <- control$mu_inits
sigma_inits <- control$sigma_inits
lambda_inits <- control$lambda_inits
nu_inits <- control$nu_inits
gamma_inits <- control$gamma_inits
start_id <- all(!is.null(mu_inits), !is.null(sigma_inits), !is.null(lambda_inits),
!is.null(nu_inits), !is.null(gamma_inits))
# Estimation settings
## Response
if (is.vector(y))
y <- matrix(y, ncol = length(y))
if (is.data.frame(y))
y <- as.matrix(y)
n <- nrow(y)
d <- ncol(y)
### Association matrix
association <- match.arg(association, c("unstructured", "uniform", "nonassociative"))
association <- get(association)(d)
## Margins
margins <- as.vector(matrix(margins, 1, d))
## Important identifies
Gamma_id <- !is.logical(association)
lambda_id <- apply(matrix(margins, ncol = 1), 1, function(x) !grepl("Log-", as.bcs(x)$name))
nu_id <- apply(matrix(margins, ncol = 1), 1, function(x) as.bcs(x)$extrap)
## Cleaning the arguments that will be passed to the optim function
control$method <- control$hessian <- control$start <- control$mu_inits <- control$sigma_inits <-
control$lambda_inits <- control$nu_inits <- control$gamma_inits <- NULL
### Copula
copula <- match.arg(copula, c("gaussian", "t", "slash", "hyp"))
## Univariate quantile function of the corresponding NSM distribution
if (copula == "slash") {
Wsl <- distr::AbscontDistribution(
d = function(x) dslash(x, nu = delta),
Symmetry = distr::SphericalSymmetry(0)
)
dPSI <- function(x, log = FALSE) dslash(x, nu = delta, log = log)
qPSI <- function(p) distr::q(Wsl)(p)
dgf <- function(u, d, log = FALSE){
id1 <- which(u == 0)
id2 <- which(u != 0)
out <- vector("numeric", length(u))
out[id1] <- log(2 * delta) - (d/2) * log(2 * pi) - log(2 * delta + d)
out[id2] <- log(delta) + delta * log(2) + log(ig(delta + d / 2, u[id2] / 2)) -
(d/2) * log(pi) - (delta + d/2) * log(u[id2])
if (!log) out <- exp(out)
out
}
} else if (copula == "gaussian") {
dPSI <- function(x, log = FALSE) stats::dnorm(x, log = log)
qPSI <- function(p) stats::qnorm(p)
dgf <- function(u, d, log = FALSE){
out <- -0.5 * u - (d/2) * log(2 * pi)
if (!log) out <- exp(out)
out
}
} else if (copula == "t") {
dPSI <- function(x, log = FALSE) stats::dt(x, delta, log = log)
qPSI <- function(p) stats::qt(p, delta)
dgf <- function(u, d, log = FALSE){
out <- lgamma(0.5 * (delta + d)) - 0.5 * (delta + d) * log(1 + u / delta) -
lgamma(delta / 2) - (d/2) * log(delta * pi)
if (!log) out <- exp(out)
out
}
} else {
Whp <- distr::AbscontDistribution(
d = function(x) dhyp(x, nu = delta),
Symmetry = distr::SphericalSymmetry(0)
)
dPSI <- function(x, log = FALSE) dhyp(x, nu = delta, log = log)
qPSI <- function(p) distr::q(Whp)(p)
dgf <- function(u, d, log = FALSE){
out <- (d - 1) * log(delta) + log(besselK(delta * sqrt(1 + u), nu = 1 - d/2)) -
(d/2) * log(2 * pi) - log(besselK(delta, nu = 1)) -
(d/2 - 1) * log(delta * sqrt(1 + u))
if (!log) out <- exp(out)
out
}
}
## Association matrix
if (!Gamma_id) {
association <- list(npar = 0L)
Gamma <- sin(pi * stats::cor(y, method = "kendall") / 2)
}
# Parameter indexation -------------------------------------------------------
par_id <- list(
mu = 1:d,
sigma = 1:d + d,
lambda = if (any(lambda_id)) 1:sum(lambda_id) + 2 * d else NULL,
nu = if (any(nu_id)) 1:sum(nu_id) + sum(lambda_id) + 2 * d else NULL,
gamma = if (association$npar > 0) 1:association$npar + sum(nu_id) + sum(lambda_id) + 2 * d else NULL
)
## Initial values ------------------------------------------------------------
if (!start_id) {
if (is.null(gamma_inits) & Gamma_id) gamma_inits <- association$start(y)
mu0 <- sigma0 <- lambda0 <- nu0 <- rep(NA, d)
for (j in 1:d) {
marg_inits <- get(margins[j])(y[, j])$start(y[, j])
mu0[j] <- if (is.null(mu_inits)) marg_inits[1] else mu_inits[j]
sigma0[j] <- if (is.null(sigma_inits)) marg_inits[2] else sigma_inits[j]
lambda0[j] <- if (lambda_id[j] & is.null(lambda_inits)) marg_inits[3] else if (lambda_id[j] & !is.null(lambda_inits)) lambda_inits[j] else NA
if (nu_id[j]) nu0[j] <- utils::tail(marg_inits, 1)
}
inits <- c(mu0, sigma0, lambda0[lambda_id], nu0[nu_id], gamma_inits)
} else {
inits <- start
}
## Log-likelihood ------------------------------------------------------------
ll <- function(theta) {
## Parameter setting
mu <- theta[par_id$mu]
sigma <- theta[par_id$sigma]
lambda <- rep(NA, d)
lambda[lambda_id] <- theta[par_id$lambda]
nu <- rep(NA, d)
nu[nu_id] <- theta[par_id$nu]
gamma <- theta[par_id$gamma]
## Parameter space for the association parameters identifier
gamma_id <- FALSE
Gamma <- association$Gamma(gamma)
if (Gamma_id) {
dec <- tryCatch(chol(Gamma), error = function(e) e)
if (inherits(dec, "error")) dec <- NULL
if (length(gamma) > 0L) gamma_id <- any(gamma < association$lower | gamma > association$upper)
}
## Out
if (any(!is.finite(mu)) | any(!is.finite(sigma)) | any(!is.finite(lambda[lambda_id])) |
any(mu < 0) | any(sigma < 0) | any(nu[nu_id] < 0) | gamma_id | is.null(dec)) {
-Inf
} else {
epsilon <- log_f <- matrix(NA, n, d)
EPS <- .Machine$double.eps^(1 / 2)
for (j in 1:d) {
epsilon[, j] <- qPSI(pmin(pmax(get(paste0("p", margins[j]))(q = y[, j], mu = mu[j], sigma = sigma[j],
lambda = lambda[j], nu = nu[j]), EPS), 1 - EPS))
log_f[, j] <- get(paste0("d", margins[j]))(x = y[, j], mu = mu[j], sigma = sigma[j],
lambda = lambda[j], nu = nu[j], log = TRUE)
}
tmp <- backsolve(dec, t(epsilon), transpose = TRUE)
rss <- colSums(tmp^2)
-n * sum(log(diag(dec))) + sum(dgf(rss, d, log = TRUE)) - sum(dPSI(epsilon, log = TRUE)) + sum(log_f)
}
}
## Estimates --------------------------------------------------------
opt <- stats::optim(par = inits, fn = ll, method = method, control = control, hessian = FALSE)
# Assymptotic covariance estimates
if (hessian) {
J <- -numDeriv::hessian(ll, opt$par)
vcov <- try(solve(J), silent = TRUE)
opt$vcov <- if (unique(grepl("Error", vcov))) matrix(NA, nrow = length(opt$par), ncol = length(opt$par)) else vcov
}
if (opt$convergence > 0) {
warning(cat("optimization failed to converge\n"))
}
opt$par_id <- par_id
opt$start <- inits
lambda <- rep(NA, d)
lambda[lambda_id] <- opt$par[par_id$lambda]
nu <- rep(NA, d)
nu[nu_id] <- opt$par[par_id$nu]
# Out list
out <- list(
mu = opt$par[par_id$mu],
sigma = opt$par[par_id$sigma],
lambda = lambda,
nu = nu,
gamma = if (association$npar > 0L) opt$par[par_id$gamma] else NULL,
margins = margins,
association = association$name,
copula = copula,
delta = delta,
logLik = opt$value,
vcov = if (hessian) opt$vcov else NULL,
y = y,
optim_params = opt,
nobs = n,
d = d,
call = cl
)
class(out) <- "bcnsm"
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.