Nothing
#' @title Randomized CUR matrix decomposition.
#
#' @description Randomized CUR matrix decomposition.
#
#' @details
#' Algorithm for computing the CUR matrix decomposition of a rectangular \eqn{(m, n)} matrix \eqn{A},
#' with target rank \eqn{k << min(m,n)}. The input matrix is factored as
#'
#' \deqn{A = C * U * R}
#'
#' using the \code{\link{rid}} decomposition. The factor matrix \eqn{C} is formed using actual
#' columns of \eqn{A}, also called the partial column skeleton. The factor matrix \eqn{R} is formed
#' using actual rows of \eqn{A}, also called the partial row skeleton.
#'
#' If \eqn{rand=TRUE} a probabilistic strategy is used to compute the decomposition, otherwise a
#' deterministic algorithm is used.
#'
#'
#' @param A array_like; \cr
#' numeric \eqn{(m, n)} input matrix (or data frame). \cr
#' If the data contain \eqn{NA}s na.omit is applied.
#'
#' @param k integer; \cr
#' target rank of the low-rank approximation, i.e., the number of columns/rows
#' to be selected. It is required that \eqn{k} is smaller or equal to \eqn{min(m,n)}.
#'
#' @param p integer, optional; \cr
#' oversampling parameter (default \eqn{p=10}).
#'
#' @param q integer, optional; \cr
#' number of additional power iterations (default \eqn{q=0}).
#'
#' @param idx_only bool, optional; \cr
#' if (\eqn{TRUE}), only the index set \code{C.idx} and \code{R.idx} is returned, but not
#' the matrices \code{C} and \code{R}.
#' This is more memory efficient, when dealing with large-scale data.
#'
#' @param rand bool, optional; \cr
#' if (\eqn{TRUE}), a probabilistic strategy is used, otherwise a deterministic algorithm is used.
#'
#'
#' @return \code{rcur} returns a list with class \eqn{id} containing the following components:
#'\describe{
#' \item{C}{ array_like; \cr
#' column subset \eqn{C = A[,C.idx]}; \eqn{(m, k)} dimensional array.
#' }
#'
#' \item{R}{ Array_like. \cr
#' row subset \eqn{R = A[R.idx, ]}; \eqn{(k, n)} dimensional array.
#' }
#'
#' \item{U}{ array_like; \cr
#' connector matrix; \eqn{(k,k)} dimensional array.
#' }
#'
#' \item{C.idx}{ array_like; \cr
#' index set of the \eqn{k} selected columns used to form \eqn{C}.
#' }
#'
#' \item{R.idx}{ array_like; \cr
#' index set of the \eqn{k} selected rows used to form \eqn{R}.
#' }
#'
#'
#' \item{C.scores}{ array_like; \cr
#' scores of the selected columns.
#' }
#'
#' \item{R.scores}{ array_like; \cr
#' scores of the selected rows.
#' }
#'}
#'
#' @references
#' \itemize{
#' \item [1] N. B. Erichson, S. Voronin, S. L. Brunton and J. N. Kutz. 2019.
#' Randomized Matrix Decompositions Using {R}.
#' Journal of Statistical Software, 89(11), 1-48.
#' \doi{10.18637/jss.v089.i11}.
#'
#' \item [2] N. Halko, P. Martinsson, and J. Tropp.
#' "Finding structure with randomness: probabilistic
#' algorithms for constructing approximate matrix
#' decompositions" (2009).
#' (available at arXiv \url{https://arxiv.org/abs/0909.4061}).
#'}
#'
#'
#' @author N. Benjamin Erichson, \email{erichson@berkeley.edu}
#'
#' @seealso \code{\link{rid}}
#'
#'
#'
#' @examples
#' \dontrun{
#' # Load test image
#' data('tiger')
#'
#' # Compute (column) randomized interpolative decompsition
#' # Note that the image needs to be transposed for correct plotting
#' out <- rcur(tiger, k = 150)
#'
#' # Reconstruct image
#' tiger.re <- out$C %*% out$U %*% out$R
#'
#' # Compute relative error
#' print(norm(tiger-tiger.re, 'F') / norm(tiger, 'F'))
#'
#' # Plot approximated image
#' image(tiger.re, col = gray((0:255)/255))
#' }
#' @export
rcur <- function(A, k = NULL, p = 10, q = 0, idx_only = FALSE, rand = TRUE) UseMethod("rcur")
#' @export
rcur.default <- function(A, k = NULL, p = 10, q = 0, idx_only = FALSE, rand = TRUE) {
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Checks
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (any(is.na(A))) {
warning("Missing values are omitted: na.omit(A).")
A <- stats::na.omit(A)
}
m <- nrow(A)
n <- ncol(A)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Init id object
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
rcurObj = list( C = NULL,
R = NULL,
U = NULL,
C.idx = NULL,
R.idx = NULL,
C.scores = NULL,
R.scores = NULL,
rand = rand)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set target rank
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(is.null(k)) k <- min(n,m)
if(k > min(m,n)) k <- min(m,n)
if(k < 1) stop("Target rank is not valid!")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Compute column ID
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
out_cid <- rid(A, k = k, p = p, q = q, mode = 'column', idx_only = TRUE, rand = rand)
# Select column subset
if(idx_only == FALSE) {
rcurObj$C <- matrix(A[, out_cid$idx], nrow = m, ncol = k)
colnames(rcurObj$C) <- colnames(A)[out_cid$idx]
rownames(rcurObj$C) <- rownames(A)
}
rcurObj$C.idx <- out_cid$idx
rcurObj$C.scores <- out_cid$scores
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Compute row ID of C
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#out_rid <- rid(A[, rcurObj$C.idx], k = k, p = p, q = q, mode = 'row', idx_only = TRUE, rand = FALSE)
out <- qr(H(A[, rcurObj$C.idx]), LAPACK=TRUE)
# Get index set
rcurObj$R.idx <- out$pivot[1:k] # Get row set
# Get R =: S
S <- qr.R( out )
#Q <- qr.Q( out )
# Select row subset
if(idx_only == FALSE) {
rcurObj$R <- matrix(A[rcurObj$R.idx,], nrow = k, ncol = n)
rownames(rcurObj$R) <- rownames(A)[rcurObj$R.idx]
colnames(rcurObj$R) <- colnames(A)
}
rcurObj$R.scores <- abs(diag(S))[1:k]
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Compute U
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#rcurObj$U = Z %*% MASS::ginv(matrix(A[rcurObj$R.idx, ], nrow = k, ncol = n) )
rcurObj$U = out_cid$Z %*% pinv(matrix(A[rcurObj$R.idx, ], nrow = k, ncol = n))
#colinv = matrix(0, 1 , n)
#colinv[out_cid$pivot] = 1:n
#V = rbind(diag(k), H(out_cid$Z))[colinv,]
# Rt*Ut = V via RRt*Ut = R*V
#RV = rcurObj$R %*% V
#rcurObj$U = H(pinv(rcurObj$R %*% H(rcurObj$R)) %*% RV)
# Rt*Ut = V
#rcurObj$U = H(pinv(H(rcurObj$R)) %*% V)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Return
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
class(rcurObj) <- "rcur"
return( rcurObj )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.