R/project_matrix.R

Defines functions get_single_from_double_indices get_double_from_single_indices get_equal_indices_by_perm project_matrix

Documented in project_matrix

#' Project matrix after optimization
#'
#' After the MAP permutation was found with [find_MAP()],
#' use this permutation to approximate the covariance matrix
#' with bigger statistical confidence.
#'
#' Project matrix on the space of symmetrical matrices invariant
#' by a cyclic group generated by `perm`.
#' This implements the formal
#' [Definition 3 from references](https://arxiv.org/abs/2004.03503).
#'
#' When `S` is the sample covariance matrix (output of `cov()` function, see
#' examples), then `S` is the **unbiased estimator** of the covariance matrix.
#' However, the **maximum likelihood estimator** of the covariance matrix is
#' `S*(n-1)/(n)`, unless n < p, when the
#' **maximum likelihood estimator does not exist**. For more information, see
#' [Wikipedia - Estimation of covariance matrices](https://en.wikipedia.org/wiki/Estimation_of_covariance_matrices).
#'
#' The maximum likelihood estimator differs when one knows the covariance
#' matrix is **invariant under some permutation**. This estimator will
#' be symmetric AND have some values repeated (see examples and
#' [Corollary 12 from references](https://arxiv.org/abs/2004.03503)).
#'
#' The estimator will be invariant under the given permutation. Also, it
#' will **need fewer observations** for the maximum likelihood estimator to
#' exist (see **Project Matrix - Equation (6)** section in
#' `vignette("Theory", package = "gips")` or in its
#' [pkgdown page](https://przechoj.github.io/gips/articles/Theory.html)).
#' For some permutations, even \eqn{n = 2} could be enough.
#' The minimal number of observations needed are named `n0` and
#' can be calculated by `summary.gips()`.
#'
#' For more details, see the **Project Matrix - Equation (6)**
#' section in `vignette("Theory", package = "gips")` or in its
#' [pkgdown page](https://przechoj.github.io/gips/articles/Theory.html).
#'
#' @param S A square matrix to be projected.
#'     The empirical covariance matrix.
#'     (See the `S` parameter in the `gips()` function).
#'     When it is not positive semi-definite,
#'     it shows a warning of a class `not_positive_semi_definite_matrix`.
#' @param perm A permutation to be projected on.
#'     An object of a `gips` class,
#'     a `gips_perm` class, or anything that can be used
#'     as the `x` argument in the `gips_perm()` function.
#' @param precomputed_equal_indices This parameter is for internal use only.
#'
#' @returns Returns the matrix `S` projected on the space of symmetrical matrices invariant
#' by a cyclic group generated by `perm`. See Details for more.
#' @export
#'
#' @seealso
#' * [Wikipedia - Estimation of covariance matrices](https://en.wikipedia.org/wiki/Estimation_of_covariance_matrices)
#' * **Project Matrix - Equation (6)** section of
#'     `vignette("Theory", package = "gips")` or its
#'     [pkgdown page](https://przechoj.github.io/gips/articles/Theory.html) -
#'     A place to learn more about the math
#'     behind the `gips` package and see
#'     more examples of `project_matrix()`.
#' * [find_MAP()] - The function that finds
#'     the Maximum A Posteriori (MAP) Estimator
#'     for a given `gips` object.
#'     After the MAP Estimator is found, the matrix
#'     `S` can be projected on this permutation,
#'     creating the MAP Estimator of the covariance matrix
#'     (see examples).
#' * [gips_perm()] - Constructor for the `perm` parameter.
#' * [plot.gips()] - For `plot(g, type = "MLE")`,
#'     the `project_matrix()` is called (see examples).
#' * [summary.gips()] - Can calculate the `n0`, the minimal
#'     number of observations, so that the projected matrix
#'     will be the MLE estimator of the covariance matrix.
#'
#' @examples
#' p <- 6
#' my_perm <- "(14)(23)" # permutation (1,4)(2,3)(5)(6)
#' number_of_observations <- 10
#' X <- matrix(rnorm(p * number_of_observations), number_of_observations, p)
#' S <- cov(X)
#' projected_S <- project_matrix(S, perm = my_perm)
#' projected_S
#' # The value in [1,1] is the same as in [4,4]; also, [2,2] and [3,3];
#'   # also [1,2] and [3,4]; also, [1,5] and [4,5]; and so on
#'
#' # Plot the projected matrix:
#' g <- gips(S, number_of_observations, perm = my_perm)
#' plot(g, type = "MLE")
#'
#' # Find the MAP Estimator of covariance
#' g_MAP <- find_MAP(g, max_iter = 10, show_progress_bar = FALSE, optimizer = "Metropolis_Hastings")
#' S_MAP <- project_matrix(attr(g, "S"), perm = g_MAP)
#' S_MAP
#' plot(g_MAP, type = "heatmap")
project_matrix <- function(S, perm, precomputed_equal_indices = NULL) {
  if (!is.matrix(S)) {
    rlang::abort(c("There was a problem identified with the provided arguments:",
      "i" = "`S` must be a matrix.",
      "x" = paste0(
        "You provided `S` with `class(S) == (",
        paste(class(S), collapse = ", "),
        ")`."
      )
    ))
  }
  if (nrow(S) != ncol(S)) {
    rlang::abort(c("There was a problem identified with the provided arguments:",
      "i" = "`S` must be a square matrix.",
      "x" = paste0(
        "You provided `S` as a matrix, but with different sizes: ",
        nrow(S), " and ", ncol(S), "."
      )
    ))
  }
  if (!is.positive.semi.definite.matrix(S)) {
    rlang::warn(c(
      "i" = "`project_matrix()` is designed for positive semi-definite matrices",
      "x" = "You provided `S` that is not positive semi-definite matrix",
      "*" = "`gips` can still project this matrix on the provided permutation",
      "i" = "Did You provide the wrong `S` matrix?"
    ), class = "not_positive_semi_definite_matrix")
  }

  if (is.null(precomputed_equal_indices)) {
    perm_size <- ncol(S)
    if (!inherits(perm, "gips_perm")) {
      perm <- gips_perm(perm, perm_size)
    } else if (attr(perm, "size") != ncol(S)) {
      rlang::abort(c("There was a problem identified with the provided arguments:",
        "i" = "Size of `perm` must be equal to number of columns and rows of `S`.",
        "x" = paste0(
          "You provided `perm` with `size == ",
          attr(perm, "size"),
          "`, but the `S` matrix has ",
          ncol(S), " columns."
        )
      ))
    }
    equal_indices_by_perm <- get_equal_indices_by_perm(perm)
  } else {
    equal_indices_by_perm <- precomputed_equal_indices
  }
  mean_values <- sapply(equal_indices_by_perm, function(indices) {
    mean(S[indices])
  })
  means <- rep(mean_values, sapply(equal_indices_by_perm, length))
  projected_matrix <- matrix(nrow = nrow(S), ncol = ncol(S))
  projected_matrix[unlist(equal_indices_by_perm)] <- means

  colnames(projected_matrix) <- colnames(S)
  rownames(projected_matrix) <- rownames(S)

  projected_matrix
}

#' Get indices of elements of perm_size x perm_size matrix, which should be equal
#'
#' @param perm An object of a `gips_perm` class.
#'
#' @returns A list of integer vectors. Each vector contains a SINGLE index
#' of elements, which should be equal in symmetrical matrix invariant
#' by permutation `perm`.
#'
#' @examples
#' perm <- gips_perm("(1,2,3)(4,5)", 6)
#' matrix_symvariant <- matrix(c(
#'   2, 1, 1, 3, 3, 4,
#'   1, 2, 1, 3, 3, 4,
#'   1, 1, 2, 3, 3, 4,
#'   3, 3, 3, 5, 6, 7,
#'   3, 3, 3, 6, 5, 7,
#'   4, 4, 4, 7, 7, 8
#' ), byrow = TRUE, ncol = 6)
#' out <- get_equal_indices_by_perm(perm, 6)
#' all(sapply(out, function(v) all.equal(matrix_symvariant[v]))) # TRUE
#' @noRd
get_equal_indices_by_perm <- function(perm) {
  perm_size <- attr(perm, "size")
  # We'll be iterating over pairs of subcycles
  subcycle_indice_pairs <- matrix(
    c(
      rep(1:length(perm), each = length(perm)),
      rep(1:length(perm), times = length(perm))
    ),
    ncol = 2
  )

  subcycle_indice_pairs <- subcycle_indice_pairs[subcycle_indice_pairs[, 1] <= subcycle_indice_pairs[, 2], ,
    drop = FALSE
  ]
  # subcycle_indice_pairs is a matrix of pairs like (5,6), where the second is not smaller than the first

  # Let's go
  nested_list <- lapply(1:nrow(subcycle_indice_pairs), function(pair_index) {
    i <- subcycle_indice_pairs[pair_index, 1]
    j <- subcycle_indice_pairs[pair_index, 2]
    subcycle_1 <- perm[[i]]
    subcycle_2 <- perm[[j]]

    # matrix_subcycle is a subcycle of permutation P defined as
    # P(k,l) = (perm(k), perm(l))
    matrix_subcycle_length <- numbers::LCM(
      length(subcycle_1),
      length(subcycle_2)
    )
    number_of_matrix_subcycles <- length(subcycle_1) * length(subcycle_2) /
      matrix_subcycle_length

    # Instead of operating on subcycle elements, we will be operating
    # on elements' indices
    # I.e. for subcycle s=[3,5,2] we operate on indices [1,2,3]
    # s[1] = 3 etc
    elements_indices <- 1:matrix_subcycle_length
    subcycle_1_indices <- elements_indices %% length(subcycle_1)
    subcycle_1_indices[subcycle_1_indices == 0] <- length(subcycle_1)
    subcycle_1_elements <- subcycle_1[subcycle_1_indices]

    subcycle_2_indices <- elements_indices %% length(subcycle_2)
    subcycle_2_indices[subcycle_2_indices == 0] <- length(subcycle_2)

    lapply(1:(number_of_matrix_subcycles), function(k) {
      subcycle_2_elements <- subcycle_2[shift_vector(
        subcycle_2_indices,
        k - 1
      )]

      double_indices <- matrix(c(
        subcycle_1_elements, subcycle_2_elements,
        subcycle_2_elements, subcycle_1_elements
      ), ncol = 2)

      single_indices <- get_single_from_double_indices(
        double_indices,
        perm_size
      )

      # We need to correct for matrix subcycles, that are symmetric
      if (i == j &&
        single_indices[matrix_subcycle_length + 1] %in%
          single_indices[1:matrix_subcycle_length]) {
        single_indices <- single_indices[1:matrix_subcycle_length]
      }
      shift_vector(single_indices, which.min(single_indices) - 1)
    })
  })
  unlist(nested_list, recursive = FALSE)
}

#' Indices utils
#'
#' Elements of `matrix` can be accessed by double indices `M[i,j]`
#' or, when treating matrix as a long vector, single indices `M[k]`.
#' These functions allow to switch from one kind of indices to another.
#'
#' @noRd
get_double_from_single_indices <- function(indices, matrix_size) {
  row_indices <- indices %% matrix_size
  row_indices[row_indices == 0] <- matrix_size
  matrix(c(
    row_indices,
    ceiling(indices / matrix_size)
  ), ncol = 2)
}

get_single_from_double_indices <- function(indices, matrix_size) {
  (indices[, 2] - 1) * matrix_size + indices[, 1]
}
PrzeChoj/gips documentation built on June 12, 2025, 12:23 a.m.