R/updates.R

Defines functions update_alpha update_sigma update_tau proj_evals update_psi_mapg

update_alpha <- function(Y, Z, pen = 0, Sigma)
{
  if(pen == 0) return(MASS::ginv(crossprod(Z)) %*% crossprod(Z, Y))
  return(syl_cpp(crossprod(Z), pen * qr.solve(Sigma), -crossprod(Z, Y)))
}

update_sigma <- function(Y, Z, alpha)
{
  return(crossprod(Y - Z %*% alpha) / nrow(Y))
}

update_tau <- function(X, U, d)
{
  p <- ncol(X)
  X <- X %*% U %*% diag(1 / sqrt(1 + d), p)
  tau <- mean(X^2)
  return(tau)
}

proj_evals <- function(d, k){
  p <- length(d)
  # All but k largest eigenvalues vanish
  if(p > k){
    d[(k + 1):p] <- 0
  }
  # All are non-negative
  d[d < 0] <- 0
  return(d)
}

update_psi_mapg <- function(alpha, Sigma, Psi, tau, k, X, Y, step1 = NA, step2 = NA,
                            maxit = 1e3, tol = 1e-6, relative = TRUE,
                            quiet = TRUE)
{
  n <- nrow(Y)
  p <- ncol(X)
  r <- ncol(Y)
  e_S <- eigen(Sigma)
  Omega_sqrt <- e_S$vectors %*% diag(1 / sqrt(abs(e_S$values)), r)

  # Scaled data
  alpha_tilde <- alpha %*% Omega_sqrt
  Y_tilde <- Y %*% Omega_sqrt

  # Pick Lipschitz step-size
  L <- 1 + (norm(X, "2")^2 / n) * (norm(alpha_tilde, "2")^2 + 2 / tau)
  if(is.na(step1)){
    step1 <- 1 / L
  }
  if(is.na(step2)){
    step2 <- 1 / L
  }

  # Initialize
  # Notation from Li and Lin (2015):
  # y_1 = G
  # z_1 = H
  # x_1 = Psi
  # x_0 = Psi_m1
  # v_1 = F
  # t_1 = s
  # t_0 = s_m1
  Psi_m1 <- Psi
  H <- Psi
  s_m1 <- 0
  s <- 1
  e_F <- eigen(Psi, symmetric = TRUE) # Same storage as later
  e_F$values <- proj_evals(e_F$values, k = k)
  obj_old <- obj_fun(U = e_F$vectors, d = e_F$values, alpha = alpha,
                 Sigma = Sigma, tau = tau, Y = Y, X = X)
  for(iter in seq(maxit)){
    if(iter == maxit){
      warning("mAPG reached maxit \n")
    }
    ###########################################################################
    # Get monitor updates
    ###########################################################################
    e_F <- eigen(Psi, symmetric = TRUE)
    # If the folowing fails, step1 has to be too large
    e_F <- eigen(Psi - step1 * grad_obj_psi(U = e_F$vectors, d = e_F$values,
                                            alpha_tilde = alpha_tilde,
                                            tau = tau, Y_tilde = Y_tilde, X = X),
                 symmetric = TRUE)

    e_F$values <- proj_evals(e_F$values, k = k)
    obj_at_F <- obj_fun(U = e_F$vectors, d = e_F$values, alpha = alpha,
                    Sigma = Sigma, tau = tau, Y = Y, X = X)
    ###########################################################################


    ###########################################################################
    # Accelerate to G and get update
    ###########################################################################
    G <- Psi + (s_m1 / s) * (H - Psi) + ((s_m1 - 1) / s) * (Psi - Psi_m1)
    e_G <- eigen(G, symmetric = TRUE)
    if(min(e_G$values) <= -1){  # Gradient cannot be evaluated; restart
      obj_at_H <- Inf
      H <- Psi
    } else{
      e_G <- eigen(G - step2 * grad_obj_psi(U = e_G$vectors, d = e_G$values,
                                            alpha_tilde = alpha_tilde, tau = tau,
                                            Y_tilde = Y_tilde, X = X),
                   symmetric = TRUE)
      # Project proposed steps to feasible set, get eval, and save H
      e_G$values <- proj_evals(e_G$values, k = k)
      obj_at_H <- obj_fun(U = e_G$vectors, d = e_G$values, alpha = alpha,
                      Sigma = Sigma, tau = tau, Y = Y, X = X)
      H <- e_G$vectors %*% tcrossprod(diag(e_G$values, p), e_G$vectors)
    }


    # Save old and set new iterate
    Psi_m1 <- Psi

    if(obj_at_H <= obj_at_F){
      Psi <- H
      obj_new <- obj_at_H
    } else{
      Psi <- e_F$vectors %*% tcrossprod(diag(e_F$values, p), e_F$vectors)
      obj_new <- obj_at_F
    }

    # Check if more iterations needed
    change <- obj_new - obj_old
    if(relative) change <- change / obj_old

    if(!quiet) cat("Change at Psi iteration ", iter, " is: ", change, "\n")

    # Update acceleration step length and objective
    s_m1 <- s
    s <- 0.5 * (sqrt(4 * s^2 + 1) + 1)
    obj_old <- obj_new

    if(abs(change) < tol){
      break
    }
  }
  return(list(Psi = Psi, iter = iter, change = change))
}

# update_psi_mapg_ls <- function(alpha, Sigma, Psi, tau, k, X, Y, delta = 1e-8,
#                                rho = 0.8, maxit = 1e3, tol = 1e-6,
#                                relative = TRUE, quiet = TRUE)
# {
#   n <- nrow(Y)
#   p <- ncol(X)
#   r <- ncol(Y)
#   e_S<- eigen(Sigma)
#   Omega_sqrt <- e_S$vectors %*% diag(1 / sqrt(e_S$values), r)
#
#   # Scaled data
#   alpha_tilde <- alpha %*% Omega_sqrt
#   Y_tilde <- Y %*% Omega_sqrt
#
#   # Notation from Li and Lin (2015)
#   # y_k = G
#   # y_{k - 1} = G_m1
#   # z_{k + 1} = H
#   # x_k = Psi
#   # x_{k - 1} = Psi_m1
#   # v_{k + 1} = V
#   # t_{k + 1} = s
#   # t_k = s_m1
#   # s_k = S
#   # r_k = R
#   # \nabla f(z_k) = grad_at_H
#   # \nabla f(y_{k - 1}) = grad_at_G
#   # \nabla f(v_k) = grad_at_F
#   # \nabla f(x_{k - 1}) = grad_at_Psi
#
#   # Initialize
#   H <- Psi
#   G_m1 <- Psi
#   Psi_m1 <- Psi
#   V <- Psi
#   s <- 1
#   s_m1 <- 0
#
#   e_P <- eigen(Psi, symmetric = TRUE)
#   e_P$values <- proj_evals(e_P$values, k = k)
#   obj_old <- obj_fun(U = e_P$vectors, d = e_P$values, alpha = alpha,
#                  Sigma = Sigma, tau = tau, Y = Y, X = X)
#   grad_at_Psi <- grad_obj_psi(U = e_P$vectors, d = e_P$values,
#                               alpha_tilde = alpha_tilde, tau = tau,
#                               Y_tilde = Y_tilde, X = X)
#
#   grad_at_H <- grad_at_Psi
#   grad_at_V <- grad_at_Psi
#   grad_at_G <- grad_at_Psi
#
#   # Return without iterations if at stationary point
#   if(sum(grad_at_Psi^2) < tol){
#     warning("Algorithm started in stationary point \n")
#     return(list(Psi = Psi, iter = 0, change = 0))
#   }
#
#   # MAIN LOOP
#   for(iter in seq(maxit)){
#     # Accelerate to G
#     G <- Psi + (s_m1 / s) * (H - Psi) + ((s_m1 - 1) / s) * (Psi - Psi_m1)
#
#     # Step-size calculations
#     S <- H - G_m1
#     R <- grad_at_H - grad_at_G
#     step1 <- sum(S * R) / sum(R^2)
#
#     S <- V - Psi_m1
#     R <- grad_at_V - grad_at_Psi
#     step2 <- sum(S * R) / sum(R^2)
#
#     if(iter == 1){
#       step1 <- 1
#       step2 <- 1
#     }
#
#     # First line search
#     # Starting point
#     e_G <- eigen(G, symmetric = TRUE)
#     grad_at_G <- grad_obj_psi(U = e_G$vectors, d = e_G$values,
#                               alpha_tilde = alpha_tilde, tau = tau,
#                               Y_tilde = Y_tilde, X = X)
#     obj_at_G <- obj_fun(U = e_G$vectors, d = e_G$values, alpha = alpha,
#                     Sigma = Sigma, tau = tau, Y = Y, X = X)
#     for(i_iter in seq(maxit)){
#       # Proposed iterate
#       e_H <- eigen(G - step1 * grad_at_G, symmetric = TRUE)
#       e_H$values <- proj_evals(e_H$values, k = k)
#       obj_at_H <- obj_fun(U = e_H$vectors, d = e_H$values, alpha = alpha,
#                       Sigma = Sigma, tau = tau, Y = Y, X = X)
#       H <- e_H$vectors %*% tcrossprod(diag(e_H$values, p), e_H$vectors)
#
#       if(i_iter == maxit) warning("First line search reached maxit \n")
#       if(obj_at_H <= (obj_at_G - delta * sum((H - G)^2))) break
#       step1 <- rho * step1
#     }
#     grad_at_H <- grad_obj_psi(U = e_H$vectors, d = e_H$values,
#                               alpha_tilde = alpha_tilde, tau = tau,
#                               Y_tilde = Y_tilde, X = X)
#
#     # Second line search
#     # Starting point
#     e_P <- eigen(Psi, symmetric = TRUE)
#     grad_at_Psi <- grad_obj_psi(U = e_P$vectors, d = e_P$values,
#                               alpha_tilde = alpha_tilde, tau = tau,
#                               Y_tilde = Y_tilde, X = X)
#     obj_at_Psi <- obj_fun(U = e_P$vectors, d = e_P$values, alpha = alpha,
#                     Sigma = Sigma, tau = tau, Y = Y, X = X)
#
#     for(i_iter in seq(maxit)){
#       # Proposed iterate
#       e_V <- eigen(Psi - step2 * grad_at_Psi, symmetric = TRUE)
#       e_V$values <- proj_evals(e_V$values, k = k)
#       obj_at_V <- obj_fun(U = e_V$vectors, d = e_V$values, alpha = alpha,
#                       Sigma = Sigma, tau = tau, Y = Y, X = X)
#       V <- e_V$vectors %*% tcrossprod(diag(e_V$values, p), e_V$vectors)
#
#       if(i_iter == maxit) warning("Second line search reached maxit \n")
#       if(obj_at_V <= (obj_at_Psi - delta * sum((V - Psi)^2))) break
#       step2 <- rho * step2
#     }
#     grad_at_V <- grad_obj_psi(U = e_V$vectors, d = e_V$values,
#                               alpha_tilde = alpha_tilde, tau = tau,
#                               Y_tilde = Y_tilde, X = X)
#
#
#     # Update iterates
#     Psi_m1 <- Psi
#     G_m1 <- G
#     if(obj_at_H <= obj_at_V){
#       Psi <- H
#       obj_new <- obj_at_H
#     } else{
#       Psi <- V
#       obj_new <- obj_at_V
#     }
#     # Check if more iterations needed
#     change <- obj_new - obj_old
#     if(relative) change <- change / obj_old
#
#     if(!quiet) cat("Change at Psi iteration ", iter, " is: ", change, "\n")
#
#     # Update acceleration step length and objective
#     s_m1 <- s
#     s <- 0.5 * (sqrt(4 * s^2 + 1) + 1)
#     obj_old <- obj_new
#
#     if(iter == maxit){
#       warning("mAPG reached maxit \n")
#     }
#     if(abs(change) < tol){
#       break
#     }
#   }
#   e_P <- eigen(Psi, symmetric = TRUE)
#   grad_at_Psi <- grad_obj_psi(U = e_P$vectors, d = e_P$values,
#                               alpha_tilde = alpha_tilde, tau = tau,
#                               Y_tilde = Y_tilde, X = X)
#   return(list(Psi = Psi, grad = grad_at_Psi, iter = iter,
#               change = change, step1 = step1, step2 = step2))
# }
#
# update_apg <- function(alpha, Sigma, Psi, tau, k, X, Y, maxit = 1e3, tol = 1e-6, quiet = TRUE)
# {
#   n <- nrow(X)
#   p <- ncol(X)
#   r <- ncol(Y)
#   e_S <- eigen(Sigma, symmetric = TRUE)
#   Omega_sqrt <- e_S$vectors %*% diag(1 / sqrt(e_S$values), r)
#   grad_f <- function(x, opts){
#     Psi <- matrix(x, p, p)
#     Psi <- 0.5 * (Psi + t(Psi))
#     e_P <- eigen(Psi, symmetric = TRUE)
#     return(as.vector(grad_obj_psi(e_P$vectors, e_P$values, alpha %*% Omega_sqrt, tau, Y %*% Omega_sqrt, X)))
#   }
#
#   prox <- function(x, t, opts){
#     Psi <- matrix(x, p, p)
#     Psi <- 0.5 * (Psi + t(Psi))
#     e_P <- eigen(Psi, symmetric = TRUE)
#     e_P$values <- proj_evals(e_P$values, k)
#     return(as.vector(e_P$vectors %*% tcrossprod(diag(e_P$values, p), e_P$vectors)))
#   }
#
#   opts <- list("X_INIT" = as.vector(Psi), "EPS" = tol, "MAX_ITERS" = maxit, "QUIET" = quiet)
#
#   update <- apg::apg(grad_f = grad_f, prox_h = prox, dim_x = p^2, opts = opts)
#   return(list(Psi = matrix(update$x, p, p), grad = NA, iter = NA,
#               change = NA, step1 = NA, step2 = NA))
# }
koekvall/mpredcc documentation built on Nov. 4, 2019, 3:54 p.m.