R/RcppExports.R
In robustmatrix: Robust Matrix-Variate Parameter Estimation

Documented in cstep mmcd mmle

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

MLErow <- function(X_std, cov_col_inv, diag = FALSE, nthreads = 1L) {
    .Call(`_robustmatrix_MLErow`, X_std, cov_col_inv, diag, nthreads)
}

MLEcol <- function(X_std, cov_row_inv, diag = FALSE, nthreads = 1L) {
    .Call(`_robustmatrix_MLEcol`, X_std, cov_row_inv, diag, nthreads)
}

KroneckerNorm <- function(A, B, C, D, diag_row = FALSE, diag_col = FALSE) {
    .Call(`_robustmatrix_KroneckerNorm`, A, B, C, D, diag_row, diag_col)
}

#' Maximum Likelihood Estimation for Matrix Normal Distribtuion
#'
#' \code{mmle} computes the Maximum Likelihood Estimators (MLEs) for the matrix normal distribution
#' using the iterative flip-flop algorithm \insertCite{Dutilleul1999}{robustmatrix}.
#'
#' @param X a 3d array of dimension \eqn{(p,q,n)}, containing \eqn{n} matrix-variate samples of \eqn{p} rows and \eqn{q} columns in each slice.
#' @param cov_row_init matrix. Initial \code{cov_row} \eqn{p \times p} matrix. Identity by default.
#' @param cov_col_init matrix. Initial \code{cov_col} \eqn{p \times p} matrix. Identity by default.
#' @param diag Character. If "none" (default) all entries of \code{cov_row} and \code{cov_col} are estimated. If either "both", "row", or "col" only the diagonal entries of the respective matrices are estimated.
#' @param max_iter upper limit of iterations.
#' @param lambda a smooting parameter for the rowwise and columnwise covariance matrices.
#' @param silent Logical. If FALSE (default) warnings and errors are printed.
#' @param nthreads Integer. If 1 (default), all computations are carried out sequentially.
#' If larger then 1, matrix multiplication in the flip-flop algorithm is carried out in parallel using \code{nthreads} threads. Be aware of the overhead of parallelization for small matrices and or small sample sizes.
#' If < 0, all possible threads are used.
#'
#' @return A list containing the following:
#' \item{\code{mu}}{Estimated \eqn{p \times q} mean matrix.}
#' \item{\code{cov_row}}{Estimated \eqn{p} times \eqn{p} rowwise covariance matrix.}
#' \item{\code{cov_col}}{Estimated \eqn{q} times \eqn{q} columnwise covariance matrix.}
#' \item{\code{cov_row_inv}}{Inverse of \code{cov_row}.}
#' \item{\code{cov_col_inv}}{Inverse of \code{cov_col}.}
#' \item{\code{norm}}{Frobenius norm of squared differences between covariance matrices in final iteration.}
#' \item{\code{iterations}}{Number of iterations of the mmle procedure.}
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso For robust parameter estimation use \code{\link{mmcd}}.
#'
#' @export
#'
#' @examples
#' n = 1000; p = 2; q = 3
#' mu = matrix(rep(0, p*q), nrow = p, ncol = q)
#' cov_row = matrix(c(1,0.5,0.5,1), nrow = p, ncol = p)
#' cov_col = matrix(c(3,2,1,2,3,2,1,2,3), nrow = q, ncol = q)
#' X <- rmatnorm(n = 1000, mu, cov_row, cov_col)
#' par_mmle <- mmle(X)
mmle <- function(X, cov_row_init = NULL, cov_col_init = NULL, diag = "none", max_iter = 100L, lambda = 0, silent = FALSE, nthreads = 1L) {
    .Call(`_robustmatrix_mmle`, X, cov_row_init, cov_col_init, diag, max_iter, lambda, silent, nthreads)
}

MMD <- function(X, mu, cov_row, cov_col, inverted = FALSE) {
    .Call(`_robustmatrix_MMD`, X, mu, cov_row, cov_col, inverted)
}

TensorMMD <- function(X, mu, cov_row, cov_col, inverted = FALSE, nthreads = 1L) {
    .Call(`_robustmatrix_TensorMMD`, X, mu, cov_row, cov_col, inverted, nthreads)
}

#' C-step of Matrix Minimum Covariance Determinant (MMCD) Estimator
#'
#' This function is part of the FastMMCD algorithm \insertCite{mayrhofer2024}{robustmatrix}.
#'
#' @param h_init Integer. Size of initial h-subset. If smaller than 0 (default) size is chosen automatically.
#' @param init Logical. If TRUE (default) elemental subsets are used to initialize the procedure.
#' @param max_iter upper limit of C-step iterations (default is 100)
#' @inheritParams mmcd
#'
#' @return A list containing the following:
#' \item{\code{mu}}{Estimated \eqn{p \times q} mean matrix.}
#' \item{\code{cov_row}}{Estimated \eqn{p} times \eqn{p} rowwise covariance matrix.}
#' \item{\code{cov_col}}{Estimated \eqn{q} times \eqn{q} columnwise covariance matrix.}
#' \item{\code{cov_row_inv}}{Inverse of \code{cov_row}.}
#' \item{\code{cov_col_inv}}{Inverse of \code{cov_col}.}
#' \item{\code{md}}{Squared Mahalanobis distances.}
#' \item{\code{md_raw}}{Squared Mahalanobis distances based on \emph{raw} MMCD estimators.}
#' \item{\code{det}}{Value of objective function (determinant of Kronecker product of rowwise and columnwise covariane).}
#' \item{\code{dets}}{Objective values for the final h-subsets.}
#' \item{\code{h_subset}}{Final h-subset of \emph{raw} MMCD estimators.}
#' \item{\code{iterations}}{Number of C-steps.}
#'
#' @seealso \code{\link{mmcd}}
#'
#' @export
#'
#' @examples
#' n = 1000; p = 2; q = 3
#' mu = matrix(rep(0, p*q), nrow = p, ncol = q)
#' cov_row = matrix(c(1,0.5,0.5,1), nrow = p, ncol = p)
#' cov_col = matrix(c(3,2,1,2,3,2,1,2,3), nrow = q, ncol = q)
#' X <- rmatnorm(n = 1000, mu, cov_row, cov_col)
#' ind <- sample(1:n, 0.3*n)
#' X[,,ind] <- rmatnorm(n = length(ind), matrix(rep(10, p*q), nrow = p, ncol = q), cov_row, cov_col)
#' par_mmle <- mmle(X)
#' par_cstep <- cstep(X)
#' distances_mmle <- mmd(X, par_mmle$mu, par_mmle$cov_row, par_mmle$cov_col)
#' distances_cstep <- mmd(X, par_cstep$mu, par_cstep$cov_row, par_cstep$cov_col)
#' plot(distances_mmle, distances_cstep)
#' abline(h = qchisq(0.99, p*q), lty = 2, col = "red")
#' abline(v = qchisq(0.99, p*q), lty = 2, col = "red")
cstep <- function(X, alpha = 0.5, cov_row_init = NULL, cov_col_init = NULL, diag = "none", h_init = -1L, init = TRUE, max_iter = 100L, max_iter_MLE = 100L, lambda = 0, adapt_alpha = TRUE) {
    .Call(`_robustmatrix_cstep`, X, alpha, cov_row_init, cov_col_init, diag, h_init, init, max_iter, max_iter_MLE, lambda, adapt_alpha)
}

#' The Matrix Minimum Covariance Determinant (MMCD) Estimator
#'
#' \code{mmcd} computes the robust MMCD estimators of location and covariance for matrix-variate data
#' using the FastMMCD algorithm \insertCite{mayrhofer2024}{robustmatrix}.
#'
#' @param nsamp number of initial h-subsets (default is 500).
#' @param alpha numeric parameter between 0.5 (default) and 1. Controls the size \eqn{h \approx alpha * n} of the h-subset over which the determinant is minimized.
#' @param max_iter_cstep upper limit of C-step iterations (default is 100)
#' @param max_iter_MLE upper limit of MLE iterations (default is 100)
#' @param max_iter_cstep_init upper limit of C-step iterations for initial h-subsets (default is 2)
#' @param max_iter_MLE_init upper limit of MLE iterations for initial h-subsets (default is 2)
#' @param nmini for \eqn{n > 2*nmini} the algorithm splits the data into smaller subsets with at least \code{nmini} samples for initialization (default is 300)
#' @param nsub_pop for \eqn{n > 2*nmini} a subpopulation of \eqn{\min(n,nsub\_pop)} samples is divided into \eqn{k = floor(\min(n,nsub\_pop)/nmini)} subsets  for initialization (default is 1500)
#' @param adapt_alpha Logical. If TRUE (default) alpha is adapted to take the dimension of the data into account.
#' @param reweight Logical. If TRUE (default) the reweighted MMCD estimators are computed.
#' @param scale_consistency Character. Either "quant" (default) or "mmd_med". If "quant", the consistency factor is chosen to achieve consistency under the matrix normal distribution.
#' If "mmd_med", the consistency factor is chosen based on the Mahalanobis distances of the observations.
#' @param outlier_quant numeric parameter between 0 and 1. Chi-square quantile used in the reweighting step.
#' @param nthreads Integer. If 1 (default), all computations are carried out sequentially.
#' If larger then 1, C-steps are carried out in parallel using \code{nthreads} threads.
#' If < 0, all possible threads are used.
#' @inheritParams mmle
#'
#' @return A list containing the following:
#' \item{\code{mu}}{Estimated \eqn{p \times q} mean matrix.}
#' \item{\code{cov_row}}{Estimated \eqn{p} times \eqn{p} rowwise covariance matrix.}
#' \item{\code{cov_col}}{Estimated \eqn{q} times \eqn{q} columnwise covariance matrix.}
#' \item{\code{cov_row_inv}}{Inverse of \code{cov_row}.}
#' \item{\code{cov_col_inv}}{Inverse of \code{cov_col}.}
#' \item{\code{md}}{Squared Mahalanobis distances.}
#' \item{\code{md_raw}}{Squared Mahalanobis distances based on \emph{raw} MMCD estimators.}
#' \item{\code{det}}{Value of objective function (determinant of Kronecker product of rowwise and columnwise covariane).}
#' \item{\code{alpha}}{The (adjusted) value of alpha used to determine the size of the h-subset.}
#' \item{\code{consistency_factors}}{Consistency factors for raw and reweighted MMCD estimators.}
#' \item{\code{dets}}{Objective values for the final h-subsets.}
#' \item{\code{best_i}}{ID of subset with best objective.}
#' \item{\code{h_subset}}{Final h-subset of \emph{raw} MMCD estimators.}
#' \item{\code{h_subset_reweighted}}{Final h-subset of \emph{reweighted} MMCD estimators.}
#' \item{\code{iterations}}{Number of C-steps.}
#' \item{\code{dets_init_first}}{Objective values for the \code{nsamp} initial h-subsets after \code{max_iter_cstep_init} C-steps.}
#' \item{\code{subsets_first}}{Subsets created in subsampling procedure for large \code{n}.}
#' \item{\code{dets_init_second}}{Objective values of the 10 best initial subsets after executing C-steps until convergence.}
#'
#' @details The MMCD estimators generalize the well-known Minimum Covariance Determinant (MCD)
#' \insertCite{Rousseeuw1985,Rousseeuw1999}{robustmatrix} to the matrix-variate setting.
#' It looks for the \eqn{h} observations, \eqn{h = \alpha * n}, whose covariance matrix has the smallest determinant.
#' The FastMMCD algorithm is used for computation and is described in detail in \insertCite{mayrhofer2024}{robustmatrix}.
#' NOTE: The procedure depends on \emph{random} initial subsets. Currently setting a seed is only possible if \code{nthreads = 1}.
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso The \code{mmcd} algorithm uses the \code{\link{cstep}} and \code{\link{mmle}} functions.
#'
#' @export
#'
#' @examples
#' n = 1000; p = 2; q = 3
#' mu = matrix(rep(0, p*q), nrow = p, ncol = q)
#' cov_row = matrix(c(1,0.5,0.5,1), nrow = p, ncol = p)
#' cov_col = matrix(c(3,2,1,2,3,2,1,2,3), nrow = q, ncol = q)
#' X <- rmatnorm(n = n, mu, cov_row, cov_col)
#' ind <- sample(1:n, 0.3*n)
#' X[,,ind] <- rmatnorm(n = length(ind), matrix(rep(10, p*q), nrow = p, ncol = q), cov_row, cov_col)
#' par_mmle <- mmle(X)
#' par_mmcd <- mmcd(X)
#' distances_mmle <- mmd(X, par_mmle$mu, par_mmle$cov_row, par_mmle$cov_col)
#' distances_mmcd <- mmd(X, par_mmcd$mu, par_mmcd$cov_row, par_mmcd$cov_col)
#' plot(distances_mmle, distances_mmcd)
#' abline(h = qchisq(0.99, p*q), lty = 2, col = "red")
#' abline(v = qchisq(0.99, p*q), lty = 2, col = "red")
mmcd <- function(X, nsamp = 500L, alpha = 0.5, cov_row_init = NULL, cov_col_init = NULL, diag = "none", lambda = 0, max_iter_cstep = 100L, max_iter_MLE = 100L, max_iter_cstep_init = 2L, max_iter_MLE_init = 2L, nmini = 300L, nsub_pop = 1500L, adapt_alpha = TRUE, reweight = TRUE, scale_consistency = "quant", outlier_quant = 0.975, nthreads = 1L) {
    .Call(`_robustmatrix_mmcd`, X, nsamp, alpha, cov_row_init, cov_col_init, diag, lambda, max_iter_cstep, max_iter_MLE, max_iter_cstep_init, max_iter_MLE_init, nmini, nsub_pop, adapt_alpha, reweight, scale_consistency, outlier_quant, nthreads)
}