# R/morpca.R In dfleis/morpca: Manifold Optimization for Robust PCA

#' Manifold Optimization for Robust PCA
#'
#' A robust PCA algorithm designed to extract an underlying low-rank matrix
#' with grossly corrupted observations.
#'
#' @details Implementation of the robust PCA algorithms outlined in
#' Zhang, T. and Yang, Y. (forthcoming) to recover the underlying low rank
#' matrix \eqn{L^*} given an input matrix \eqn{Y} assumed to be of the form
#' \deqn{Y = L^* + S^*,} where \eqn{S^*} is a sparse matrix representing the
#' noise and \eqn{L^*} representing the signal. This implementation offers
#' both the projective and orthographic retractions as methods to map from
#' the input matrix's tangent space back to the manifold of low rank matrices.
#' Robustness is achieved via a hard thresholding function which sets
#' entry \eqn{(i,j)} to 0 if it exceeds the \eqn{\gamma}-th percentile of the
#' \eqn{i}-th row and \eqn{j}-th column (see Zhang, T. and Yang, Y. (forthcoming)
#' for details).
#'
#' To recover \eqn{L^*} we solve the following optimization problem:
#' \deqn{argmin_{rank(L) = r} f(L),}
#' where
#' \deqn{f(L) = 1/2 || F(L - Y) ||^2_F,}
#' such that \eqn{F} is the hard-thresholding function described above.
#'
#' @param Y Input matrix composed of the sum of matrices \eqn{L^*}
#'          (signal) and \eqn{S^*} (noise).
#' @param r Rank of the underlying matrix \eqn{L^*} and its estimate.
#' @param gamma Value between 0 and 1 corresponding to percentile
#'              for the hard thresholding procedure.
#' @param sparsity TO DO...
#' @param retraction String specifying which retraction technique
#'                   should be applied. Currently implemented are the
#'                   \code{"projective"} and \code{"orthographic"}
#'                   retractions.
#' @param stepsize Positive nonzero number corresponding to the step size
#'                 \eqn{\eta} in the gradient descent algorithm.
#' @param maxiter Positive nonzero integer specifying the maximum number
#'                of steps to compute for the gradient descent algorithm.
#' @param stepsout Boolean value. If \code{stepsout = T} then the function
#'                 returns the output low rank matrix estimate and corresponding
#' @param verbose Boolean value. If \code{verbose = T} then the gradient
#'                descent algorithm reports the current step number and
#'                value of the objective function at each iteration.
#'
#' @return Returns an object of class \code{morpca} with elements:
#'
#' \item{Y}{The original observations used as the input matrix for which we
#' seek an underlying low rank matrix.}
#' \item{rank}{Rank of the estimated target underlying matrix.}
#' \item{gamma}{Thresholding percentile.}
#' \item{sparsity}{TO DO...}
#' \item{retraction}{User specified retraction.}
#' \item{stepsize}{User defined step size for the gradient descent process.}
#' \item{maxiter}{Maximum number of iterations before the gradient descent process terminates.}
#' \item{solution}{Estimated underlying low rank matrix. List of matrices if \code{stepsout = T}.}
#' \item{gradient}{Corresponding gradient matrix of the objective function. List of matrices if \code{stepsout = T}.}
#' \item{objective}{Corresponding value of the objective function (Frobenius norm of the gradient matrix).}
#'
#' @export
morpca <- function(Y = NULL, r = NULL, gamma = NULL, sparsity = NULL,
retraction = c("projective", "orthographic"),
stepsize  = NULL,
maxiter   = 100,
stepsout  = F,
verbose   = F) {
# TO DO:
#   * Set escape condition under a sufficient tolerance (i.e. 10^-10 or something)
#   * Handle partial observations (NA values)
#   * Handle missing and invalid inputs
#   * Rename input 'gamma' to 'threshold' NOTE: WE MUST RENAME THE CORRESPONDING
#     threshold() FUNCTION!

if (is.null(Y)) {
# return warning/set default? return error?
}
if (is.null(r)) {
# return warning/set default? return error?
}
if (is.null(gamma)) {
# return warning/set default? return error?
}
if (is.null(sparsity)) {
# return warning/set default? return error?
}
if (is.null(stepsize)) {
# return warning/set default? return error?
# check if stepsize <= 0
}
# convergence tolerance
#if (is.null(tol)) { # (not actually implemented yet)
#  # there's probably some theoretical guideline in the paper
# # look through it to set something up...
#  tol <- ncol(Y) * nrow(Y) * .Machine$double.eps #} # set up data structures L_list <- gradient_list <- vector(mode = 'list', length = maxiter + 1) objective <- vector(mode = 'numeric', length = maxiter + 1) n1 <- nrow(Y); n2 <- ncol(Y) ################## ### INITIALIZE ### ################## ### OLD: # initialize best rank-r approximation of Y # (? check this since the paper says to intialize with the best rank-r # approx. of f(Y) which appears to be a typo ?) # #L_list[] <- rank_r_approx_cpp(Y, r) #gradient_list[] <- percentile_threshold(L_list[] - Y, gamma) ### NEW: (not sure what this starting point is/what is its theoretical # justification?) L <- threshold(Y, gamma, sparsity) SVD <- svd(tcrossprod(L)) U <- SVD$u

L_list[] <- U[,1:r] %*% crossprod(U[,1:r], L)

if (verbose) {
print(paste0("k = 0. Objective value = ", sprintf("%.5g", objective, 5)))
}

#========================#
#========================#
out <- vector(mode = "list", length = 2)

for (k in 1:maxiter) {

if (retraction == "projective" |
retraction == "proj" |
retraction == "p") {

out <- projective_retraction(L = L_list[[k]], Y = Y, r = r, gamma = gamma,
eta = stepsize, sparsity = sparsity)

} else if (retraction == "orthographic" |
retraction == "orth" |
retraction == "o") {

out <- orthographic_retraction(L = L_list[[k]], Y = Y, r = r, gamma = gamma,
eta = stepsize, sparsity = sparsity)

} else { # invalid 'retraction' input specified
stop("Error in morpca(): Argument 'retraction' must be specified as either 'projective' or 'orthographic'.")
}

L_list[[k + 1]]        <- out$L gradient_list[[k + 1]] <- out$gradient
objective[k + 1]       <- norm(gradient_list[[k + 1]], "f")

if (verbose) {
print(
paste0("k = ", k, ". Objective value = ",
sprintf("%.5g", objective[k + 1], 5)))
}

if (!stepsout) { # clear memory
L_list[[k]]        <- NA
}
}

# remove erased elements in output if stepsout = T
L_list        <- L_list[!is.na(L_list)]

#================#
# RETURN OUTPTUS #
#================#
out <- list("Y"          = Y,
"rank"       = r,
"gamma"      = gamma,
"sparsity"   = sparsity,
"retraction" = retraction,
"stepsize"   = stepsize,
"maxiter"    = maxiter,
"solution"   = L_list[!sapply(L_list, is.null)],
out$solution <- out$solution[[length(out$solution)]] out$gradient <- out$gradient[[length(out$gradient)]]