rccm: Implemention of Random Covariance Models

Documented in adj aic dwishart randCalc rccmLogLike zToA

#' Probability Density Function for Wishart Distribution
#'
#' This function provides the probability density function for
#' the Wishart distribution.
#' @param x \eqn{p} x \eqn{p} positive definite matrix.
#' @param M \eqn{p} x \eqn{p} mean matrix. Note that \eqn{M = nu*V} where \eqn{V} is the scale matrix.
#' @param nu Degrees of freedom.
#' @param logged Logical. If TRUE, probability given on log scale.
#' @return The probability density function value.
#'
#' @export
dwishart <- function(x, M, nu, logged = FALSE) {
  x <- (x + t(x)) / 2
  M <- (M + t(M)) / 2
  p <- nrow(x)
  lnumr <- (nu - p - 1) / 2 * log(det(x)) - nu / 2 * sum(diag(solve(M) * x))
  ldenom <- (nu * p / 2) * log(2) + (nu / 2) * log(det(1 / nu * M)) + (p * (p - 1) / 4) * log(pi) + sum(sapply(1:p,
                                                                                         FUN = function(j) {
                                                                                           lgamma(nu / 2 + (1 - j) / 2)}))
  if (logged) {
    return(lnumr - ldenom)
  } else {
    return(exp(lnumr - ldenom))
  }
}

#' Adjacency Matrix
#'
#' This function calculates an adjacency matrix for the
#' matrix \eqn{mat} based on the absolute value threshold of \eqn{thresh}.
#' @param mat Numeric matrix.
#' @param thresh Threshold for absolute value of entries.
#' @return An adjacency matrix containing 1's and 0's.
#'
#' @export
adj <- function(mat, thresh = 0.001) {
  return((abs(mat) > thresh) + 0)
}

#' Z to Adjacency Matrix
#'
#' This function calculates an adjacency matrix based on
#' an integer vector of cluster memberships.
#' @param z Integer vector of cluster memberships.
#' @return An adjacency matrix containing 1's and 0's.
#'
#' @examples
#' # Calculate adjacency matrix for clustering
#' zToA(c(rep(1, 2), rep(2, 2), rep(3, 2)))
#'
#' @export
zToA <- function(z) {
  K <- length(z)
  A <- matrix(0, nrow = K, ncol = K)
  for (r in 1:K) {
    for (s in 1:K) {
      A[r, s] <- ifelse(z[r] != 0 & z[s] != 0, as.integer(z[r] == z[s]), 0)
    }
  }
  return(A)
}

#' Rand Index
#'
#' This function calculates the rand index describing
#' the amount of agreement between two integer vectors
#'  of cluster memberships.
#' @param x First integer vector of cluster memberships.
#' @param y Second integer vector of cluster memberships.
#' @return The rand index value, bounded between 0 and 1.
#'
#' @export
randCalc <- function(x, y) {
  Ahat <- zToA(x)[lower.tri(zToA(x), diag = FALSE)]
A0 <- zToA(y)[lower.tri(zToA(y), diag = FALSE)]
return((sum((Ahat - A0) == 2) + sum((Ahat - A0) == 0)) / choose(n = length(x), 2))
}

#' AIC
#'
#' This function calculates the AIC value
#' for the random covariance clustering model (RCCM)
#' @param omegaks \eqn{p} x \eqn{p} x \eqn{K} array of \eqn{K} number of estimated subject-level precision matrices.
#' @param omega0s \eqn{p} x \eqn{p} x \eqn{nclusts} array of \eqn{nclusts} number of estimated cluster-level precision matrices.
#' @param ws \eqn{nclusts} x \eqn{K} matrix of estimated cluster weights for each subject (weights).
#' @param x List of \eqn{K} data matrices each of dimension \eqn{n_k} x \eqn{p}.
#' @param lambda2 Non-negative scalar value used as input to rccm function to obtain estimates.
#' @return Numeric AIC value.
#'
#' @examples
#' # Generate data
#' set.seed(1994)
#' myData <- rccSim(G = 2, clustSize = 10, p = 10, n = 100, overlap = 0.50, rho = 0.10)
#'
#' # Analyze with RCCM
#' resultRccm <- rccm(x = myData$simDat, lambda1 = 20,
#' lambda2 = 325, lambda3 = 0.01, nclusts = 2)
#'
#' # Calculate AIC
#' aic(omegaks = resultRccm$Omegas, omega0s = resultRccm$Omega0,
#' ws = resultRccm$weights, x = myData$simDat, lambda2 = 325)
#'
#' @export
aic <- function(omegaks, omega0s, ws, x, lambda2) {

  K <- dim(omegaks)[3]
  G <- dim(omega0s)[3]

  dfks <- sapply(X = 1:K, FUN = function(k) {
    sum(rccm::adj(omegaks[, , k])[lower.tri(omegaks[, , k])])
  })

  dfgs <- sapply(X = 1:G, FUN = function(g) {
    sum(rccm::adj(omega0s[, , g])[lower.tri(omega0s[, , g])])
  })

  modelDim <- sum(c(dfks, dfgs))
  mll <- rccmLogLike(omegaks = omegaks, omega0s = omega0s, ws = ws, x = x, lambda2 = lambda2)

  aic <- 2*modelDim - 2*mll
  return(aic)
}

#' Model Log-Likelihood
#'
#' This function calculates the model log-likelihood
#' for the random covariance clustering model (RCCM)
#' @param omegaks \eqn{p} x \eqn{p} x \eqn{K} array of \eqn{K} number of estimated subject-level precision matrices.
#' @param omega0s \eqn{p} x \eqn{p} x \eqn{nclusts} array of \eqn{nclusts} number of estimated cluster-level precision matrices.
#' @param ws \eqn{nclusts} x \eqn{K} matrix of estimated cluster weights for each subject (weights).
#' @param x List of \eqn{K} data matrices each of dimension \eqn{n_k} x \eqn{p}.
#' @param lambda2 Non-negative scalar value used as input to rccm function to obtain estimates.
#' @return Numeric AIC value.
#'
#' @examples
#' # Generate data
#' set.seed(1994)
#' myData <- rccSim(G = 2, clustSize = 10, p = 10, n = 100, overlap = 0.50, rho = 0.10)
#'
#' # Analyze with RCCM
#' resultRccm <- rccm(x = myData$simDat, lambda1 = 20,
#' lambda2 = 325, lambda3 = 0.01, nclusts = 2)
#'
#' # Calculate AIC
#' rccmLogLike(omegaks = resultRccm$Omegas, omega0s = resultRccm$Omega0,
#' ws = resultRccm$weights, x = myData$simDat, lambda2 = 325)
#'
#' @export
rccmLogLike <- function(omegaks, omega0s, ws, x, lambda2) {
  G <- dim(omega0s)[3]
  K <- dim(omegaks)[3]

  mll <- 0

  for(k in 1:K) {
    lk1 <- sum(mvtnorm::dmvnorm(x[[k]], sigma = solve(omegaks[, , k]), log = T))

    if(any(ws[, k] == 1)) {
      lk2 <- rccm::dwishart(omegaks[, , k], M = omega0s[, , which(ws[, k] == 1)], logged = T, nu = lambda2)

      mll <- mll + lk1 + lk2
    } else {
    lk2 <- log(sum(sapply(1:G, FUN = function(g) {
      ws[g, k] * rccm::dwishart(omegaks[, , k], M = omega0s[, , g], logged = F, nu = lambda2)})))

    mll <- mll + lk1 + lk2
    }
  }

  return(mll)
}