R/gamCopula-package.R

#' gamCopula: Generalized Additive Models for Bivariate Conditional Dependence Structures and Vine Copulas
#'
#' This package implements inference and simulation tools to apply generalized additive models to bivariate dependence structures and vine copulas.
#'
#' More details can be found in Vatter and Chavez-Demoulin (2015) and Vatter and Nagler (2016).
#'
#' @details
#' \tabular{ll}{
#' Package: \tab gamCopula\cr
#' Type: \tab Package\cr
#' Version: \tab 0.0-8\cr
#' Date: \tab 2025-04-02\cr
#' License: \tab GPL-3\cr
#' }
#'
#' @references
#' Aas, K., C. Czado, A. Frigessi, and H. Bakken (2009)
#' Pair-copula constructions of multiple dependence.
#' \emph{Insurance: Mathematics and Economics}, 44(2), 182--198.
#'
#' Brechmann, E. C., C. Czado, and K. Aas (2012)
#' Truncated regular vines in high dimensions with applications to financial data.
#' \emph{Canadian Journal of Statistics}, 40(1), 68--85.
#'
#' Dissmann, J. F., E. C. Brechmann, C. Czado, and D. Kurowicka (2013)
#' Selecting and estimating regular vine copulae and application to financial returns.
#' \emph{Computational Statistics & Data Analysis}, 59(1), 52--69.
#'
#' Vatter, T. and V. Chavez-Demoulin (2015)
#' Generalized Additive Models for Conditional Dependence Structures.
#' \emph{Journal of Multivariate Analysis}, 141, 147--167.
#'
#' Vatter, T. and T. Nagler (2016)
#' Generalized additive models for non-simplified pair-copula constructions.
#' \url{https://arxiv.org/abs/1608.01593}
#'
#' Wood, S. N. (2004)
#' Stable and efficient multiple smoothing parameter estimation for generalized additive models.
#' \emph{Journal of the American Statistical Association}, 99, 673--686.
#'
#' Wood, S. N. (2006)
#' Generalized Additive Models: an introduction with R.
#' Chapman and Hall/CRC.
#'
#' @author
#' Thibault Vatter and Thomas Nagler
#'
#' Maintainer: Thibault Vatter \email{thibault.vatter@gmail.com}
#'
#' @seealso
#' The present package heavily relies on the \pkg{mgcv} and \pkg{VineCopula} packages, as it basically extends and mixes both of them.
#'
#' @examples
#' ##### A gamBiCop example
#' require(copula)
#' require(mgcv)
#' set.seed(0)
#'
#' ## Simulation parameters (sample size, correlation between covariates,
#' ## Gaussian copula family)
#' n <- 5e2
#' rho <- 0.5
#' fam <- 1
#'
#' ### A calibration surface depending on three variables
#' eta0 <- 1
#' calib.surf <- list(
#'   calib.quad <- function(t, Ti = 0, Tf = 1, b = 8) {
#'     Tm <- (Tf - Ti) / 2
#'     a <- -(b / 3) * (Tf^2 - 3 * Tf * Tm + 3 * Tm^2)
#'     return(a + b * (t - Tm)^2)
#'   },
#'   calib.sin <- function(t, Ti = 0, Tf = 1, b = 1, f = 1) {
#'     a <- b * (1 - 2 * Tf * pi / (f * Tf * pi +
#'       cos(2 * f * pi * (Tf - Ti))
#'       - cos(2 * f * pi * Ti)))
#'     return((a + b) / 2 + (b - a) * sin(2 * f * pi * (t - Ti)) / 2)
#'   },
#'   calib.exp <- function(t, Ti = 0, Tf = 1, b = 2, s = Tf / 8) {
#'     Tm <- (Tf - Ti) / 2
#'     a <- (b * s * sqrt(2 * pi) / Tf) * (pnorm(0, Tm, s) - pnorm(Tf, Tm, s))
#'     return(a + b * exp(-(t - Tm)^2 / (2 * s^2)))
#'   }
#' )
#' ### Display the calibration surface
#' par(mfrow = c(1, 3), pty = "s", mar = c(1, 1, 4, 1))
#' u <- seq(0, 1, length.out = 100)
#' sel <- matrix(c(1, 1, 2, 2, 3, 3), ncol = 2)
#' jet.colors <- colorRamp(c(
#'   "#00007F", "blue", "#007FFF", "cyan", "#7FFF7F",
#'   "yellow", "#FF7F00", "red", "#7F0000"
#' ))
#' jet <- function(x) {
#'   rgb(jet.colors(exp(x / 3) / (1 + exp(x / 3))),
#'     maxColorValue = 255
#'   )
#' }
#' for (k in 1:3) {
#'   tmp <- outer(u, u, function(x, y) {
#'     eta0 + calib.surf[[sel[k, 1]]](x) + calib.surf[[sel[k, 2]]](y)
#'   })
#'   persp(u, u, tmp,
#'     border = NA, theta = 60, phi = 30, zlab = "",
#'     col = matrix(jet(tmp), nrow = 100),
#'     xlab = paste("X", sel[k, 1], sep = ""),
#'     ylab = paste("X", sel[k, 2], sep = ""),
#'     main = paste("eta0+f", sel[k, 1],
#'       "(X", sel[k, 1], ") +f", sel[k, 2],
#'       "(X", sel[k, 2], ")",
#'       sep = ""
#'     )
#'   )
#' }
#' ### 3-dimensional matrix X of covariates
#' covariates.distr <- mvdc(normalCopula(rho, dim = 3),
#'   c("unif"), list(list(min = 0, max = 1)),
#'   marginsIdentical = TRUE
#' )
#' X <- rMvdc(n, covariates.distr)
#' ### U in [0,1]x[0,1] with copula parameter depending on X
#' U <- condBiCopSim(fam, function(x1, x2, x3) {
#'   eta0 + sum(mapply(function(f, x) {
#'     f(x)
#'   }, calib.surf, c(x1, x2, x3)))
#' }, X[, 1:3], par2 = 6, return.par = TRUE)
#' ### Merge U and X
#' data <- data.frame(U$data, X)
#' names(data) <- c(paste("u", 1:2, sep = ""), paste("x", 1:3, sep = ""))
#' ### Display the data
#' dev.off()
#' plot(data[, "u1"], data[, "u2"], xlab = "U1", ylab = "U2")
#' ### Model fit with a basis size (arguably) too small
#' ### and unpenalized cubic spines
#' pen <- FALSE
#' basis0 <- c(3, 4, 4)
#' formula <- ~ s(x1, k = basis0[1], bs = "cr", fx = !pen) +
#'   s(x2, k = basis0[2], bs = "cr", fx = !pen) +
#'   s(x3, k = basis0[3], bs = "cr", fx = !pen)
#' system.time(fit0 <- gamBiCopFit(data, formula, fam))
#' ### Model fit with a better basis size and penalized cubic splines (via min GCV)
#' pen <- TRUE
#' basis1 <- c(3, 10, 10)
#' formula <- ~ s(x1, k = basis1[1], bs = "cr", fx = !pen) +
#'   s(x2, k = basis1[2], bs = "cr", fx = !pen) +
#'   s(x3, k = basis1[3], bs = "cr", fx = !pen)
#' system.time(fit1 <- gamBiCopFit(data, formula, fam))
#' ### Extract the gamBiCop objects and show various methods
#' (res <- sapply(list(fit0, fit1), function(fit) {
#'   fit$res
#' }))
#' metds <- list("logLik" = logLik, "AIC" = AIC, "BIC" = BIC, "EDF" = EDF)
#' lapply(res, function(x) sapply(metds, function(f) f(x)))
#' ### Comparison between fitted, true smooth and spline approximation for each
#' ### true smooth function for the two basis sizes
#' fitted <- lapply(res, function(x) {
#'   gamBiCopPredict(x, data.frame(x1 = u, x2 = u, x3 = u),
#'     type = "terms"
#'   )$calib
#' })
#' true <- vector("list", 3)
#' for (i in 1:3) {
#'   y <- eta0 + calib.surf[[i]](u)
#'   true[[i]]$true <- y - eta0
#'   temp <- gam(y ~ s(u, k = basis0[i], bs = "cr", fx = TRUE))
#'   true[[i]]$approx <- predict.gam(temp, type = "terms")
#'   temp <- gam(y ~ s(u, k = basis1[i], bs = "cr", fx = FALSE))
#'   true[[i]]$approx2 <- predict.gam(temp, type = "terms")
#' }
#' ### Display results
#' par(mfrow = c(1, 3), pty = "s")
#' yy <- range(true, fitted)
#' yy[1] <- yy[1] * 1.5
#' for (k in 1:3) {
#'   plot(u, true[[k]]$true,
#'     type = "l", ylim = yy,
#'     xlab = paste("Covariate", k), ylab = paste("Smooth", k)
#'   )
#'   lines(u, true[[k]]$approx, col = "red", lty = 2)
#'   lines(u, fitted[[1]][, k], col = "red")
#'   lines(u, fitted[[2]][, k], col = "green")
#'   lines(u, true[[k]]$approx2, col = "green", lty = 2)
#'   legend("bottomleft",
#'     cex = 0.6, lty = c(1, 1, 2, 1, 2),
#'     c("True", "Fitted", "Appox 1", "Fitted 2", "Approx 2"),
#'     col = c("black", "red", "red", "green", "green")
#'   )
#' }
#' ###### A gamVine example
#' set.seed(0)
#' ###  Simulation parameters
#' ## Sample size
#' n <- 1e3
#' ## Copula families
#' familyset <- c(1:2, 301:304, 401:404)
#' ## Define a 4-dimensional R-vine tree structure matrix
#' d <- 4
#' Matrix <- c(2, 3, 4, 1, 0, 3, 4, 1, 0, 0, 4, 1, 0, 0, 0, 1)
#' Matrix <- matrix(Matrix, d, d)
#' nnames <- paste("X", 1:d, sep = "")
#' ### A function factory
#' eta0 <- 1
#' calib.surf <- list(
#'   calib.quad <- function(t, Ti = 0, Tf = 1, b = 8) {
#'     Tm <- (Tf - Ti) / 2
#'     a <- -(b / 3) * (Tf^2 - 3 * Tf * Tm + 3 * Tm^2)
#'     return(a + b * (t - Tm)^2)
#'   },
#'   calib.sin <- function(t, Ti = 0, Tf = 1, b = 1, f = 1) {
#'     a <- b * (1 - 2 * Tf * pi / (f * Tf * pi +
#'       cos(2 * f * pi * (Tf - Ti))
#'       - cos(2 * f * pi * Ti)))
#'     return((a + b) / 2 + (b - a) * sin(2 * f * pi * (t - Ti)) / 2)
#'   },
#'   calib.exp <- function(t, Ti = 0, Tf = 1, b = 2, s = Tf / 8) {
#'     Tm <- (Tf - Ti) / 2
#'     a <- (b * s * sqrt(2 * pi) / Tf) * (pnorm(0, Tm, s) - pnorm(Tf, Tm, s))
#'     return(a + b * exp(-(t - Tm)^2 / (2 * s^2)))
#'   }
#' )
#' ###  Create the model
#' ## Define gam-vine model list
#' count <- 1
#' model <- vector(mode = "list", length = d * (d - 1) / 2)
#' sel <- seq(d, d^2 - d, by = d)
#' ## First tree
#' for (i in 1:(d - 1)) {
#'   # Select a copula family
#'   family <- sample(familyset, 1)
#'   model[[count]]$family <- family
#'   # Use the canonical link and a randomly generated parameter
#'   if (is.element(family, c(1, 2))) {
#'     model[[count]]$par <- tanh(rnorm(1) / 2)
#'     if (family == 2) {
#'       model[[count]]$par2 <- 2 + exp(rnorm(1))
#'     }
#'   } else {
#'     if (is.element(family, c(401:404))) {
#'       rr <- rnorm(1)
#'       model[[count]]$par <- sign(rr) * (1 + abs(rr))
#'     } else {
#'       model[[count]]$par <- rnorm(1)
#'     }
#'     model[[count]]$par2 <- 0
#'   }
#'   count <- count + 1
#' }
#' ## A dummy dataset
#' data <- data.frame(u1 = runif(1e2), u2 = runif(1e2), matrix(runif(1e2 * d), 1e2, d))
#' ## Trees 2 to (d-1)
#' for (j in 2:(d - 1)) {
#'   for (i in 1:(d - j)) {
#'     # Select a copula family
#'     family <- sample(familyset, 1)
#'     # Select the conditiong set and create a model formula
#'     cond <- nnames[sort(Matrix[(d - j + 2):d, i])]
#'     tmpform <- paste("~", paste(paste("s(", cond, ", k=10, bs='cr')",
#'       sep = ""
#'     ), collapse = " + "))
#'     l <- length(cond)
#'     temp <- sample(3, l, replace = TRUE)
#'     # Spline approximation of the true function
#'     m <- 1e2
#'     x <- matrix(seq(0, 1, length.out = m), nrow = m, ncol = 1)
#'     if (l != 1) {
#'       tmp.fct <- paste("function(x){eta0+",
#'         paste(sapply(1:l, function(x) {
#'           paste("calib.surf[[", temp[x], "]](x[", x, "])",
#'             sep = ""
#'           )
#'         }), collapse = "+"), "}",
#'         sep = ""
#'       )
#'       tmp.fct <- eval(parse(text = tmp.fct))
#'       x <- eval(parse(text = paste0("expand.grid(",
#'         paste0(rep("x", l), collapse = ","), ")",
#'         collapse = ""
#'       )))
#'       y <- apply(x, 1, tmp.fct)
#'     } else {
#'       tmp.fct <- function(x) eta0 + calib.surf[[temp]](x)
#'       colnames(x) <- cond
#'       y <- tmp.fct(x)
#'     }
#'     # Estimate the gam model
#'     form <- as.formula(paste0("y", tmpform))
#'     dd <- data.frame(y, x)
#'     names(dd) <- c("y", cond)
#'     b <- gam(form, data = dd)
#'     # plot(x[,1],(y-fitted(b))/y)
#'     # Create a dummy gamBiCop object
#'     tmp <- gamBiCopFit(data = data, formula = form, family = 1, n.iters = 1)$res
#'     # Update the copula family and the model coefficients
#'     attr(tmp, "model")$coefficients <- coefficients(b)
#'     attr(tmp, "model")$smooth <- b$smooth
#'     attr(tmp, "family") <- family
#'     if (family == 2) {
#'       attr(tmp, "par2") <- 2 + exp(rnorm(1))
#'     }
#'     model[[count]] <- tmp
#'     count <- count + 1
#'   }
#' }
#' ## Create the gamVineCopula object
#' GVC <- gamVine(Matrix = Matrix, model = model, names = nnames)
#' print(GVC)
#' #
#' \dontrun{
#' ### Simulate and fit the model
#' sim <- gamVineSimulate(n, GVC)
#' fitGVC <- gamVineSeqFit(sim, GVC, verbose = TRUE)
#' fitGVC2 <- gamVineCopSelect(sim, Matrix, verbose = TRUE)
#' ### Plot the results
#' par(mfrow = c(3, 4))
#' plot(GVC, ylim = c(-2.5, 2.5))
#' plot(fitGVC, ylim = c(-2.5, 2.5))
#' plot(fitGVC2, ylim = c(-2.5, 2.5))
#' }
#' @name gamCopula-package
#' @aliases gamCopula-package gamCopula gamCopula.package
"_PACKAGE"

Try the gamCopula package in your browser

Any scripts or data that you put into this service are public.

gamCopula documentation built on April 4, 2025, 1:10 a.m.