R/dimension.R

Defines functions plot.dimension print.dimension dimension dimension.subspace dimension.Matrix dimension.matrix dimension.default dimension

Documented in dimension plot.dimension print.dimension

#' @title Signal subspace dimension estimation in high-dimensional matrix
#'
#' @description Estimate the dimension of a signal-rich subspace in large,
#'  high-dimensional data.
#'
#' @param x A subspace class or a numeric real-valued matrix with n
#'  number of samples and p number of features. If p > n, a warning
#'  message is generated and the transpose of x is used.
#' @param components A series of right singular vectors to estimate.
#'  Components must be smaller or equal to min(nrow(x),ncol(x)).
#' @param decomposition The method to be used; method = "svd"
#'  returns results from singular value decomposition; method = "eigen"
#'  returns results from eigenvalue decomposition.
#' @param method The method to be used; method = "double_posterior"
#'  returns results from function estimate_rank_double_posterior;
#'   method = "posterior" returns results from function estimate_rank_posterior;
#'   method = "kmeans" returns results from function estimate_rank_kmeans;
#'   method = "ladle" returns results from function estimate_rank_ladle.
#'   Default uses estimate_rank_double_posterior.
#' @param num_est_samples Split data into num_est_samples-fold
#'  for parallel computation.
#' @param verbose output message
#' @param ... Extra parameters
#' @return
#' Returns a list with entries:
#' \describe{
#'   \item{ndf:}{ The number of degrees of freedom of x.}
#'   \item{pdim:}{ The number of dimensions of x.}
#'   \item{components:}{ A series of right singular
#'    vectors estimated.}
#'   \item{var_correct:}{ Corrected population variance
#'    for Marchenko-Pastur distribution.}
#'   \item{transpose_flag:}{ A logical value indicating
#'    whether the matrix x is transposed.}
#'   \item{irl:}{ A data frame of scaled eigenvalues for
#'    specified rank and corresponding dimensions.}
#'   \item{mp_irl:}{ A data frame of sampled expected eigenvalues from
#'    Marchenko-Pastur for specified rank and corresponding dimensions.}
#'   \item{v:}{ Right singular vectors of x matrix for specified rank.}
#'   \item{u:}{ Left singular vectors of x matrix or specified rank.}
#'   \item{dimension:}{ Estimated signal subspace dimension.}
#'   \item{bcp_irl:}{ Probability of change in mean and posterior means
#'    of eigenvalue difference between $x$ and $N$.}
#' }
#' @section Details:
#'  We estimate the intrinsic dimension of a signal-rich subspace
#'  in large high-dimensional data by decomposing matrix into a
#'  signal-plus-noise space and approximate the signal-rich subspace
#'  with a rank K approximation \eqn{\hat{x}=\sum_{k=1}^{K}d_ku_k{v_k}^T}.
#'  To estimate rank K, we propose a simple procedure assuming that matrix
#'  x is composed of a low-rank signal matrix S and an average general noise
#'  random matrix \eqn{\bar{N}}. It has been shown that the average eigenvalues
#'  of random matrices N follows a universal Marchenko-Pastur (MP) distribution.
#'  We hypothesize that the deviation of eigenvalues of x from the MP
#'  distribution indicates the intrinsic dimension of signal-rich subspace.
#' @examples
#' x <- x_sim(n = 100, p = 150, ncc = 10, var = c(rep(10, 5), rep(1, 5)))
#' results <- dimension(x, components = 1:50)
#'
#' #equivelantly, if subsapce is calcualted
#' Subspace <- subspace(x, components = 1:50)
#' results <- dimension(s = Subspace, method = "double_posterior")
#'
#' str(results)
#' plot(results$subspace, changepoint = results$dimension,
#'      annotation = 10)
#' modified_legacyplot(results$bcp_irl, annotation = 10)
#' @seealso [RMTstat] for details of Marchenko-Pastur distribution.
#' @seealso https://dracodoc.wordpress.com/2014/07/21/
#' a-simple-algorithm-to-detect-flat-segments-in-noisy-signals/ for detection
#' of flat and spike in noisy signals
#' @importFrom bcp bcp
#' @importFrom  tibble tibble
#' @importFrom stringr str_locate
#' @importFrom dplyr %>%
#' @export

dimension <- function(x, components, decomposition, method,
                      num_est_samples, verbose, ...) {
  UseMethod("dimension", x)
}

dimension.default <- function(x, components, decomposition, method,
                              num_est_samples, verbose, ...) {
  stop("Don't know how to estimate a dimension object from a class of type: ",
       class(x))
}

dimension.matrix <- function(x,
                               components = NA,
                               decomposition = c("svd", "eigen"),
                               num_est_samples = NA,
                               method = c("double_posterior", "posterior",
                                 "kmeans", "ladle"),
                               verbose = FALSE, ...) {
  dimension(x = x, components = components, decomposition = decomposition,
            num_est_samples = num_est_samples, method = method,
            verbose = verbose, ... = ...)
}

dimension.Matrix <- function(x,
                               components = NA,
                               decomposition = c("svd", "eigen"),
                               num_est_samples = NA,
                               method = c("double_posterior", "posterior",
                                 "kmeans", "ladle"),
                               verbose = FALSE, ...) {
  dimension(x = x, components = components, decomposition = decomposition,
            num_est_samples = num_est_samples, method = method,
            verbose = verbose, ... = ...)
}

dimension.subspace <- function(x,
                               components = NA,
                               decomposition = c("svd", "eigen"),
                               num_est_samples = NA,
                               method = c("double_posterior", "posterior",
                                 "kmeans", "ladle"),
                               verbose = FALSE, ...) {
  dimension(x = x, components = components, decomposition = decomposition,
            num_est_samples = num_est_samples, method = method,
            verbose = verbose, ... = ...)
}

dimension <- function(x,
                      components = NA,
                      decomposition = c("svd", "eigen"),
                      method = c("double_posterior", "posterior",
                                 "kmeans", "ladle"),
                      num_est_samples = NA,
                      verbose = FALSE,
                      ...) {
# -----------------------
# Check input parameters
# -----------------------
  if (any(class(x) == "subspace")) {
    if (verbose) {
        message("Subspace has already been calculated.\n")
    }
    s <- x
  } else {
    if (is.null(x)) {
      stop("Invalid input x")
    } else {
      # Checking for rank input
      if (missing(components)) {
        components <- seq_len(min(nrow(x), ncol(x)))
        if (verbose) {
          message(paste0("No component specified. ",
              "Calculating full singular value decomposition instead.\n"))
        }
      }
      # Checking for times input
      if (missing(num_est_samples)) {
        num_est_samples <- 0
      }
      if (missing(decomposition)) {
        decomposition <- "svd"
      }
      # Calcualte subspace
      s <- subspace(x, components = components, decomposition = decomposition,
                    num_est_samples = num_est_samples, mp = TRUE, verbose)
    }
  }
  if (missing(method)) {
    method <- "double_posterior"
    }
  switch(method,
        default = {
          s %>% estimate_rank_double_posterior()
        },
        double_posterior = {
          s %>% estimate_rank_double_posterior()
        },
        posterior = {
          s %>% estimate_rank_posterior()
        },
        kmeans = {
          s %>% estimate_rank_kmeans()
        },
        ladle = {
          x %>% estimate_rank_ladle()
        },
        stop("Invalid method input")
        )
}


#' @title Print dimension
#'
#' A generic function.
#'
#' @param x a dimension class.
#' @param ... Extra parameters
#' @export
print.dimension <- function(x, ...) {
  message("An object of class dimension estimated for",
      ifelse(x$transpose_flag, "transposed", ""),
      " X matrix with ",
      x$subspace$ndf,
      " samples and ",
      x$subspace$pdim,
      " features.\n")
  message(x$message[[1]])
  invisible(x)
}



#' @title Scree plot of scaled eigenvalues of x and random noise matrix N
#'
#' @description This is a generic function for dimension class to plot scree
#'  plot of scaled eigenvalues of X and sampled scaled expected eigenvalues
#'   from Marcenko-Pastur distribution.
#' @param x A dimension class.
#' @param changepoint A number. Estimated changepoint in dimension function.
#' @param annotation A number. Choose to label points up to annotation number.
#'  Set to 0 with no annotation.
#' @param verbose output message
#' @param ... Extra parameters
#' @examples
#' \donttest{
#' plot(results, changepoint = 0, annotation = 15)
#' }
#' @import ggplot2
#' @importFrom tibble tibble
#' @importFrom ggrepel geom_text_repel
#' @export

plot.dimension <- function(x,
                          changepoint = NULL,
                          annotation = NULL,
                          verbose = TRUE,
                          ...) {
plot(x$subspace,
    changepoint = x$dimension,
    annotation = x$dimension,
    verbose = verbose)

}
WenlanzZ/dimension documentation built on Aug. 3, 2022, 2:29 a.m.