R/rtheta.R
In GMCM: Fast Estimation of Gaussian Mixture Copula Models

Documented in rtheta

#' Get random parameters for the Gaussian mixture (copula) model
#'
#' Generate a random set parameters for the Gaussian mixture
#' model (GMM) and Gaussian mixture copula model (GMCM). Primarily, it provides
#' an easy prototype of the \code{theta}-format used in \pkg{GMCM}.
#'
#' @param m The number of components in the mixture.
#' @param d The dimension of the mixture distribution.
#' @param method The method by which the theta should be generated.
#'   See details. Defaults to \code{"old"} which is the regular "old" behavior.
#' @return A named list of parameters with the 4 elements:
#'   \item{\code{m}}{An integer giving the number of components in the mixture.
#'                   Default is 3.}
#'   \item{\code{d}}{An integer giving the dimension of the mixture
#'                   distribution. Default is 2.}
#'   \item{\code{pie}}{A numeric vector of length \code{m} of mixture
#'                    proportions between 0 and 1 which sums to one.}
#'   \item{\code{mu}}{A \code{list} of length \code{m} of numeric vectors of
#'                    length \code{d} for each component.}
#'   \item{\code{sigma}}{A \code{list} of length \code{m} of variance-covariance
#'                       matrices (of size \code{d} times \code{d}) for each
#'                       component.}
#' @note The function \code{\link{is.theta}} checks whether or not \code{theta}
#'   is in the correct format.
#' @details
#'   Depending on the \code{method} argument the parameters are generated as
#'   follows. The new behavior is inspired by the simulation scenarios in
#'   Friedman (1989) but not exactly the same.
#' \itemize{
#'   \item{\code{pie}
#'         is generated by \eqn{m} draws of a chi-squared distribution with
#'         \eqn{3m} degrees of freedom divided by their sum. If
#'         \code{method = "old"} the uniform distribution is used instead.}
#'   \item{\code{mu}
#'         is generated by \eqn{m} i.i.d. \eqn{d}-dimensional zero-mean
#'         normal vectors with covariance matrix \code{100I}.
#'         (unchanged from the old behavior)}
#'   \item{\code{sigma}}{
#'     is dependent on \code{method}. The covariance matrices for each
#'     component are generated as follows. If the \code{method} is
#'     \itemize{
#'       \item{\code{"EqualSpherical"}, then the covariance matrices are the
#'             identity matrix and thus are all equal and spherical.}
#'       \item{\code{"UnequalSpherical"}, then the covariance matrices are
#'             scaled identity matrices. In component \eqn{h}, the covariance
#'             matrix is \eqn{hI}}
#'       \item{\code{"EqualEllipsoidal"}, then highly elliptical covariance
#'             matrices which equal for all components are used.
#'             The square root of the \eqn{d} eigenvalues are chosen
#'             equidistantly on
#'             the interval \eqn{10} to \eqn{1} and a randomly (uniformly)
#'             oriented orthonormal basis is chosen and used for all
#'             components.}
#'       \item{\code{"UnqualEllipsoidal"}, then highly elliptical covariance
#'             matrices different for all components are used.
#'             The eigenvalues of the covariance matrices equal as in all
#'             components as in \code{"EqualEllipsoidal"}. However, they are all
#'             randomly (uniformly) oriented (unlike as described in
#'             Friedman (1989)).}
#'       \item{\code{"old"}, then the old behavior is used.
#'             The old behavior differs from \code{"EqualEllipsoidal"} by using
#'             the absolute value of \eqn{d} zero-mean i.i.d. normal
#'             eigenvalues with a standard deviation of 8.}
#'     }}
#'   In all cases, the orientation is selected uniformly.
#' }
#' @author Anders Ellern Bilgrau <anders.ellern.bilgrau@@gmail.com>
#' @references
#'   Friedman, Jerome H. "Regularized discriminant analysis." Journal of the
#'   American statistical association 84.405 (1989): 165-175.
#' @seealso \code{\link{is.theta}}
#' @examples
#' rtheta()
#'
#' rtheta(d = 5, m = 2)
#'
#' rtheta(d = 3, m = 2, method = "EqualEllipsoidal")
#'
#' test <- rtheta()
#' is.theta(test)
#'
#' summary(test)
#' print(test)
#' plot(test)
#'
#' \dontrun{
#' A <- SimulateGMMData(n = 100, rtheta(d = 2, method = "EqualSpherical"))
#' plot(A$z, col = A$K, pch = A$K, asp = 1)
#' B <- SimulateGMMData(n = 100, rtheta(d = 2, method = "UnequalSpherical"))
#' plot(B$z, col = B$K, pch = B$K, asp = 1)
#' C <- SimulateGMMData(n = 100, rtheta(d = 2, method = "EqualEllipsoidal"))
#' plot(C$z, col = C$K, pch = C$K, asp = 1)
#' D <- SimulateGMMData(n = 100, rtheta(d = 2, method = "UnequalEllipsoidal"))
#' plot(D$z, col = D$K, pch = D$K, asp = 1)}
#' @export
rtheta <- function(m = 3, d = 2,
                   method = c("old",
                              "EqualSpherical",  "UnequalSpherical",
                              "EqualEllipsoidal","UnequalEllipsoidal")) {
  method <- match.arg(method)
  # n uniform points on d-hypersphere
  spherePointPicking <- function(n, d) {
    x <- matrix(rnorm(n*d), n, d)
    return(x/sqrt(rowSums(x^2)))
  }
  # Random (uniformly) oriented covariance matrix from eigenvalues
  lambdaToCovariance <- function(lambda, d) {
    basis <- t(spherePointPicking(n = d, d = d))
    decomp <- qr(basis, tol = 1e-17)  # QR decomposition
    Q <- qr.Q(decomp)
    return(crossprod(Q * sqrt(lambda)))
  }
  # Old function to get p.d. matrix
  getPosDefMatOld <- function(d) {
    lambda <- abs(rnorm(d, mean = 0, sd = 8))
    return(lambdaToCovariance(lambda, d))
  }
  # Get sigmas dependent on method
  getSigma <- function(d, method) {
    if (method == "EqualSpherical") {
      lambda <- rep(1, d)
      sigma <- replicate(m, diag(d), simplify = FALSE)
    } else if (method == "UnequalSpherical") {
      sigma <- lapply(seq_len(m), function(m) m*diag(d))
    } else if (method == "EqualEllipsoidal") {
      lambda <- seq(10, 1, length.out = d)^2
      real <- lambdaToCovariance(lambda, d)  # Note, the same is used all
      sigma <- replicate(m, real, simplify = FALSE)
    } else if (method == "UnequalEllipsoidal") {
      lambda <- seq(10, 1, length.out = d)^2
      sigma <- replicate(m, lambdaToCovariance(lambda, d), simplify = FALSE)
    }
    return(sigma)
  }

  if (method == "old") {
    pie   <- runif(m)
    pie   <- pie/sum(pie)
    mu    <- replicate(m, rnorm(d, sd = 10), simplify = FALSE)
    sigma <- replicate(m, getPosDefMatOld(d), simplify = FALSE)
  } else {
    pie <- rchisq(m, df = 3*m)
    pie <- pie/sum(pie)
    mu    <- replicate(m, rnorm(d, sd = 10), simplify = FALSE)
    sigma <- getSigma(d, method)
  }
  names(pie)   <- paste("pie", seq_len(m), sep = "")
  names(sigma) <- names(mu) <- paste("comp", seq_len(m), sep = "")
  out <- structure(list(m = m, d = d, pie = pie, mu = mu, sigma = sigma),
                   class = "theta")
  return(out)
}