R/03_bcnsm_distributions.R

Defines functions mvplot make_copula rbcnsm dbcnsm

Documented in dbcnsm mvplot rbcnsm

#' @name bcnsm
#'
#' @title The Class of the Multivariate Box-Cox Symmetric Distributions Generating
#'     by a Normal Scale Mixture Copula
#'
#' @description These functions provide the joint probability density function
#'     and a random generator for the class of the multivariate Box-Cox symmetric
#'     distributions generated by a normal scale mixture copula.
#'
#' @param x vector or matrix of non-negative quantiles. If \code{x} is a matrix,
#'     each row is taken to be a quantile.
#' @param n number of replicates to return.
#' @param mu vector of the marginal scale parameters in \code{(0, infty)^d}, where \code{d} is the
#'     dimension.
#' @param sigma vector of the marginal relative dispersion parameters in \code{(0, infty)^d}, where \code{d} is the
#'     dimension.
#' @param lambda vector of the marginal skewness parameters in \code{R^d}, where \code{d} is the
#'     dimension.
#' @param nu vector of possible extra parameters of the marginal distributions. In the case where
#'     no marginal is indexed by \code{nu}, it must be specified with \code{NA}.
#' @param Gamma the association matrix. It must be a positive-definite correlation matrix, default is
#'     \code{diag(ncol(x))} (the so-called non-associative association structure).
#' @param copula character; informs which normal scale mixture distribution
#'     should be used to generate the NSM copula. Currently,
#'     the copulas available are: Gaussian (\code{"gaussian"}),
#'     Student's t (\code{"t"}), slash (\code{"slash"}), and hyperbolic (\code{"hyp"}).
#' @param delta possible extra parameter associated with the mixing distribution of the
#'     copula. For example, the degrees of freedom of the \code{t} copula, or the heavy tailness
#'     parameter of the slash or the hyperbolic copula.
#' @param margins a character or a character vector; specifies the marginal BCS distributions. If all
#'     BCS margins are the same, it is sufficient to enter only one character. A table with the
#'     current available BCS distributions can be seen in \code{\link[bcnsm]{bcs}}.
#' @param log logical; if \code{TRUE}, probabilities \code{p} are given
#'     as \code{log(p)}.
#'
#' @return \code{dbcnsm} returns the evaluated joint density function and
#'      \code{rbcnsm} generates random values.
#'
#' @references
#'
#'  Vanegas, L. H., and Paula, G. A. (2016). Log-symmetric distributions:
#'      statistical properties and parameter estimation. \emph{Brazilian Journal of
#'      Probability and Statistics}, 30, 196--220.
#'
#'  Ferrari, S. L. P., and Fumes, G. (2017). Box-Cox symmetric distributions and
#'      applications to nutritional data. \emph{AStA Advances in Statistical Analysis},
#'      101, 321--344.
#'
#' @examples
#' ### Sample size and dimension
#' n <- 1000
#' d <- 4
#'
#' ### Association matrix
#' Gamma <- matrix(0.8, d, d)
#' diag(Gamma) <- 1
#'
#' ### Marginal specifications
#'
#' # Marginals
#' margins <- c("bchp", "bcsl", "bcpe", "bcloii")
#'
#' # Marginal parameters
#' mu <- c(19, 20, 15, 20)
#' sigma <- c(0.2, 0.6, 0.4, 0.3)
#' lambda <- c(-1, 1.2, 0, 1.6)
#' nu <- c(6, 4, 8, NA)
#'
#' ### Copula
#' copula <- "slash"
#' delta <- 3
#'
#' ### Generating observations
#' y <- rbcnsm(n, mu, sigma, lambda, nu, Gamma, copula, delta, margins)
#'
#' mvplot(y)  ## See ?mvplot for documentation
#'
#' @author Rodrigo M. R. Medeiros <\email{rodrigo.matheus@live.com}>
#'

# Joint density ------------------------------------------------------------------------------------
#' @rdname bcnsm
#' @export
dbcnsm <- function(x, mu, sigma, lambda, nu = NULL, Gamma = diag(ncol(x)),
                   copula = c("gaussian", "t", "slash", "hyp"),
                   delta = NULL, margins = "bcno", log = FALSE) {

  ### Setting dimensions
  if (is.vector(x))
    x <- matrix(x, ncol = length(x))

  n <- dim(x)[1]
  d <- dim(x)[2]

  ### Marginal parameters
  mu <- matrix(mu, ncol = d)
  sigma <- matrix(sigma, ncol = d)
  lambda <- matrix(lambda, ncol = d)
  aux_nu <- as.numeric(stats::na.exclude(nu))

  mu <- do.call(rbind, replicate(n / dim(mu)[1], mu, simplify = FALSE))
  sigma <- do.call(rbind, replicate(n / dim(sigma)[1], sigma, simplify = FALSE))
  lambda <- do.call(rbind, replicate(n / dim(lambda)[1], lambda, simplify = FALSE))

  margins <- as.vector(matrix(margins, 1, d))

  nu <- rep(NA, d)
  nu_id <- apply(matrix(margins, ncol = 1), 1, function(x) as.bcs(x)$extrap)
  nu[nu_id] <- aux_nu

  ### Copula
  copula <- match.arg(copula, c("gaussian", "t", "slash", "hyp"))
  copula_inf <- make_copula(copula, delta)

  dgf <- copula_inf$dgf
  qPSI <- copula_inf$qPSI

  EPS <- .Machine$double.eps^(1 / 2)
  w <- log_f <- matrix(NA, n, d)
  for (j in 1:d) {

    w[, j] <- pmin(pmax(get(paste0("p", margins[j]))(q = x[, j],
                                                     mu = mu[, j],
                                                     sigma = sigma[, j],
                                                     lambda = lambda[, j],
                                                     nu = nu[j]), EPS), 1 - EPS)

    log_f[, j] <- get(paste0("d", margins[j]))(x = x[, j],
                                               mu = mu[, j],
                                               sigma = sigma[, j],
                                               lambda = lambda[, j],
                                               nu = nu[j], log = TRUE)

  }

  epsilon <- matrix(qPSI(w), ncol = d)
  dec <- Rfast::cholesky(Gamma)

  tmp <- backsolve(dec, t(epsilon), transpose = TRUE)
  rss <- colSums(tmp^2)

  den <- -sum(log(diag(dec))) + dgf(rss, d, log = TRUE) -
    rowSums(matrix(dgf(epsilon^2, 1L, log = TRUE), ncol = d)) + rowSums(log_f)

  if (log) {
    den
  } else {
    exp(den)
  }
}

# Random generation --------------------------------------------------------------------------------
#' @rdname bcnsm
#' @export
rbcnsm <- function(n, mu, sigma, lambda, nu, Gamma = diag(ncol(x)),
                   copula = c("gaussian", "t", "slash", "hyp"),
                   delta = NULL, margins = "bcno") {
  d <- ncol(Gamma)

  ### Marginal parameters
  mu <- matrix(mu, ncol = d)
  sigma <- matrix(sigma, ncol = d)
  lambda <- matrix(lambda, ncol = d)

  mu <- do.call(rbind, replicate(n / dim(mu)[1], mu, simplify = FALSE))
  sigma <- do.call(rbind, replicate(n / dim(sigma)[1], sigma, simplify = FALSE))
  lambda <- do.call(rbind, replicate(n / dim(lambda)[1], lambda, simplify = FALSE))
  aux_nu <- as.numeric(stats::na.exclude(nu))

  margins <- as.vector(matrix(margins, 1, d))

  nu <- rep(NA, d)
  nu_id <- apply(matrix(margins, ncol = 1), 1, function(x) as.bcs(x)$extrap)
  nu[nu_id] <- aux_nu

  ## Sampling from NSM distribution
  copula <- match.arg(copula, c("gaussian", "t", "slash", "hyp"))
  mcopula <- make_copula(copula, delta)

  epsilon <- get(paste0("rmv_", copula))(n, rep(0, d), Gamma, delta)

  w <- matrix(mcopula$pPSI(epsilon), ncol = d)

  ## Sampling for the multivariate distribution
  x <- matrix(NA, n, d)
  for (j in 1:d) {

    x[, j] <- get(paste0("q", margins[j]))(p = w[, j],
                                           mu = mu[, j],
                                           sigma = sigma[, j],
                                           lambda = lambda[, j],
                                           nu = nu[j])
  }

  if (n == 1) x <- as.numeric(x)

  x
}



# Copula information -------------------------------------------------------------------------------
make_copula <- function(copula, delta) {

  switch(copula,

         gaussian = {

           dPSI <- function(x, log = FALSE) stats::dnorm(x, log = log)
           pPSI <- function(q) stats::pnorm(q)
           qPSI <- function(p) stats::qnorm(p)
           dgf <- function(u, d, log = FALSE) {
             out <- -0.5 * u - (d/2) * log(2 * pi)

             if (!log) out <- exp(out)

             out
           }

         },

         t = {

           dPSI <- function(x, log = FALSE) stats::dt(x, delta, log = log)
           pPSI <- function(q) stats::pt(q, delta)
           qPSI <- function(p) stats::qt(p, delta)
           dgf <- function(u, d, log = FALSE){

             out <- lgamma(0.5 * (delta + d)) - 0.5 * (delta + d) * log(1 + u / delta) -
               lgamma(delta / 2) - (d/2) * log(delta * pi)

             if (!log) out <- exp(out)

             out
           }

         },

         slash = {

           Wsl <- distr::AbscontDistribution(
             d = function(x) dslash(x, nu = delta),
             Symmetry = distr::SphericalSymmetry(0)
           )

           dPSI <- function(x, log = FALSE) dslash(x, nu = delta, log = log)
           pPSI <- function(q) distr::p(Wsl)(q)
           qPSI <- function(p) distr::q(Wsl)(p)
           dgf <- function(u, d, log = FALSE){

             id1 <- which(u == 0)
             id2 <- which(u != 0)

             out <- vector("numeric", length(u))

             out[id1] <- log(2 * delta) - (d/2) * log(2 * pi) - log(2 * delta + d)
             out[id2] <- log(delta) + delta * log(2) + log(ig(delta + d / 2, u[id2] / 2)) -
               (d/2) * log(pi) - (delta + d/2) * log(u[id2])

             if (!log) out <- exp(out)

             out
           }

         },

         hyp = {

           Whp <- distr::AbscontDistribution(
             d = function(x) dhyp(x, nu = delta),
             Symmetry = distr::SphericalSymmetry(0)
           )

           dPSI <- function(x, log = FALSE) dhyp(x, nu = delta, log = log)
           pPSI <- function(q) distr::p(Whp)(q)
           qPSI <- function(p) distr::q(Whp)(p)
           dgf <- function(u, d, log = FALSE){

             out <- (d - 1) * log(delta) + log(besselK(delta * sqrt(1 + u), nu = 1 - d/2)) -
               (d/2) * log(2 * pi) - log(besselK(delta, nu = 1)) -
               (d/2 - 1) * log(delta * sqrt(1 + u))

             if (!log) out <- exp(out)

             out
           }

         },

         stop(gettextf("%s copula not recognised", sQuote(copula)), domain = NA))

  list(dPSI = dPSI, pPSI = pPSI, qPSI = qPSI, dgf = dgf)

}



# make_copula <- function(copula, delta) {
#
#   switch(copula,
#
#          gaussian = {
#            dPSI <- function(x, log = FALSE) stats::dnorm(x, log = log)
#            pPSI <- function(q) stats::pnorm(q)
#            qPSI <- function(p) stats::qnorm(p)
#            #dmv <- function(x, Sigma, log = FALSE) dmv_gaussian(x, rep(0L, ncol(Sigma)), Sigma = Sigma, log = log)
#            dmv <- function(x, Sigma, log = FALSE){
#              mvnfast::dmvn(x, mu = rep(0L, ncol(Sigma)), sigma = Sigma, log = log, ncores = 2)
#            }
#
#          },
#
#          t = {
#            dPSI <- function(x, log = FALSE) stats::dt(x, df = delta, log = log)
#            pPSI <- function(q) stats::pt(q, df = delta)
#            qPSI <- function(p) stats::qt(p, df = delta)
#            #dmv <- function(x, Sigma, log = FALSE) dmv_t(x, rep(0L, ncol(Sigma)), Sigma = Sigma, delta = delta, log = log)
#            dmv <- function(x, Sigma, log = FALSE){
#              mvnfast::dmvt(x, mu = rep(0L, ncol(Sigma)), sigma = Sigma, log = log, df = delta, ncores = 2)
#            }
#          },
#
#          slash = {
#            dPSI <- function(x, log = FALSE) dslash(x, nu = delta, log = log)
#            pPSI <- function(q) pslash(q, nu = delta)
#            qPSI <- function(p) qslash(p, nu = delta)
#            dmv <- function(x, Sigma, log = FALSE) dmv_slash(x, rep(0L, ncol(Sigma)), Sigma = Sigma, delta = delta, log = log)
#          },
#
#          hyp = {
#            dPSI <- function(x, log = FALSE) dhyp(x, nu = delta, log = log)
#            pPSI <- function(q) phyp(q, nu = delta)
#            qPSI <- function(p) qhyp(p, nu = delta)
#            dmv <- function(x, Sigma, log = FALSE) NULL #dmv_hyp(x, rep(0L, d), Sigma = Sigma, delta = delta, log = log)
#          },
#
#          stop(gettextf("%s copula not recognised", sQuote(copula)), domain = NA))
#
#   list(dPSI = dPSI, pPSI = pPSI, qPSI = qPSI, dmv = dmv)
#
# }


# Comparative plot function ------------------------------------------------------------------------
#' Visualize Multivariate Data
#'
#' This function takes a matrix of quantitative data and generates a panel plot.
#' The diagonal panels contains histograms for each variable, the lower panels
#' display pairwise scatter plots, and the upper panels show some association
#' measure, such as Kendall's tau, Spearman's rho, or Pearson correlation coefficient.
#'
#' @param y a matrix or a data frame of quantitative data with variables in columns.
#' @param method a character string indicating which association coefficient
#'    is to be computed for the upper panels of the plot. It is computed
#'    via [cor](stats::cor) function. One of \code{"kendall"} (default),
#'    \code{"pearson"}, or \code{"spearman"} can be used.
#' @param ... arguments to be passed to or from methods.
#'
#' @return A panel plot displaying histograms on the diagonal, scatterplots in the
#' lower panels, and Kendall's tau correlation in the upper panels.
#'
#' @export
#'
#' @author Rodrigo M. R. Medeiros <\email{rodrigo.matheus@live.com}>
#'
#' @examples
#' data <- data.frame(x1 = rnorm(1000),
#'                    x2 = rgamma(1000, 2, 4),
#'                    x3 = rbeta(1000, 4, 1),
#'                    x5 = rlnorm(1000, 3, 0.5))
#'
#' mvplot(data)
#'
#'
mvplot <- function(y, method = c("kendall", "spearman", "pearson"), ...) {

  method <- match.arg(method, c("kendall", "spearman", "pearson"))

  if (requireNamespace("ggplot2", quietly = TRUE) &
      requireNamespace("GGally", quietly = TRUE)) {

    if (method == "kendall") {
      method_name <- "Kendall's tau"
    } else if (method == "spearman") {
      method_name <- "Spearman's rho"
    } else if (method == "pearson") {
      method_name <- "Linear correlation"
    }

    # Defines function to color according to correlation
    cor_func <- function(data, mapping, ...){

      x <- GGally::eval_data_col(data, mapping$x)
      y <- GGally::eval_data_col(data, mapping$y)

      corr <- stats::cor(x, y, method = method, use = 'complete.obs')

      colFn <- grDevices::colorRampPalette(c("brown1", "white", "dodgerblue"), interpolate ='spline')
      fill <- colFn(1000)[findInterval(corr, seq(-1, 1, length = 1000))]

      GGally::ggally_text(
        label = as.character(round(corr, 4)),
        mapping = ggplot2::aes(),
        xP = 0.5, yP = 0.5,
        cex = 2.5,
        color = 'black', ...) +
        ggplot2::theme_void() +
        ggplot2::theme(panel.background = ggplot2::element_rect(fill = fill))
    }


    u <- stats::runif(200, -1, 1)
    u[which.min(u)] <- -1
    u[which.max(u)] <- 1
    colFn <- grDevices::colorRampPalette(c("brown1", "white", "dodgerblue"), interpolate ='spline')
    lab <- ggplot2::ggplot(data.frame(x = u, y = u, z = u), ggplot2::aes(x, y, colour = z)) +
      ggplot2::geom_point() +
      ggplot2::scale_colour_gradient2(method_name,
                                      low = colFn(200)[1], high = colFn(200)[200]) +
      ggplot2::theme(legend.title.align = 0.5,
                     legend.position = "top",
                     legend.key.height = ggplot2::unit(0.3, 'cm'),
                     legend.key.width = ggplot2::unit(1.5, 'cm'))


    GGally::ggpairs(as.data.frame(y),
                    lower = list(continuous = "points", combo = "facethist", discrete = "facetbar"),
                    upper = list(continuous = GGally::wrap(cor_func)),
                    diag = list(continuous = GGally::wrap("barDiag",
                                                          colour = 1,
                                                          fill = "white",
                                                          bins = ceiling(1 + 3.33 * log(nrow(y))) )),
                    progress = FALSE)

  } else {

    ## put histograms on the diagonal
    panel.hist <- function(x, ...) {

      usr <- graphics::par("usr")
      on.exit(graphics::par(usr = usr))
      h <- graphics::hist(x, plot = FALSE)
      breaks <- h$breaks
      nB <- length(breaks)
      y <- h$density
      graphics::par(usr = c(usr[1:2], 0, max(y) + 0.5 * diff(range(y))))
      graphics::rect(breaks[-nB], 0, breaks[-1], y, col = "white", ...)

    }

    ## put an association measure on the upper panels
    panel.cor <- function(x, y, ...) {

      usr <- graphics::par("usr")
      on.exit(graphics::par(usr = usr))

      r <- stats::cor(x, y, method = method)

      colFn <- grDevices::colorRampPalette(c("brown1", "white", "dodgerblue"), interpolate = "spline")
      fill <- colFn(1000)[findInterval(r, seq(-1, 1, length = 1000))]

      graphics::par(usr = c(0, 1, 0, 1))
      txt <- format(c(r, 0.123456789), digits = 2)[1]
      txt <- paste0(txt)
      graphics::rect(0, 0, 1, 1, col = fill, border = "black")
      graphics::text(0.5, 0.5, txt, cex = 1.2)

    }

    ## put scatterplots with countour lines on the lower panels
    panel.scatter <- function(x, y, ...) {

      usr <- graphics::par("usr")
      on.exit(graphics::par(usr = usr))
      graphics::points(x, y, pch = 16, cex = 0.8)

    }

    graphics::pairs(y, lower.panel = panel.scatter, upper.panel = panel.cor, diag.panel = panel.hist,
                    labels = colnames(y), cex.labels = 1.1, font.labels = 2)


  }


}

globalVariables(c("x", "z"))
rdmatheus/BCNSM documentation built on Feb. 8, 2024, 1:28 a.m.