#' Update function for regression coefficients
#'
#' @param Y Matrix (n x r) of responses.
#' @param X Matrix (nr x p) of predictors.
#' @param W Matrix (n x r) of expansion points (~predicted latent variables).
#' @param Sigma Covariance matrix(r x r) of the latent vector.
#' @param psi Vector (r x 1) of conditional variance parameters.
#' @param type Vector (r x 1) indicating response types.
#' @return Updated vector of regression coefficients.
update_beta <- function(Y, X, W, Sigma, psi, type)
{
n <- nrow(Y)
p <- ncol(X)
r <- ncol(Y)
D1 <- t(get_cumulant_diffs(t(W), type, 1)) # t(); C++ code has obs by column
D2 <- t(get_cumulant_diffs(t(W), type, 2))
# Allocate
H <- matrix(0, nrow = p, ncol = p + 1)
for(ii in 1:n){
# Working covariance
C <- D2[ii, ] * Sigma # Works with recycling because drop = TRUE in D2
diag(C) <- diag(C) + psi
s_C <- svd(C)
# Predictor
start_idx <- (ii - 1) * r + 1
Xi <- X[seq(start_idx, start_idx + r - 1), , drop = F]
# Working response
yi <- Y[ii, ] - D1[ii, ] + D2[ii, ] * W[ii, ]
# Sum up individual contributions
H <- H + crossprod(Xi, s_C$v %*% ((1 / s_C$d) * t(s_C$u)) %*%
cbind(D2[ii, ] * Xi, yi))
}
# Use pseudo-inverse
return(solve(H[, 1:p], H[, p + 1]))
}
#' Update function for expansion points (~predicted latent variables)
#'
#' @param Y Matrix (n x r) of responses.
#' @param X Matrix (nr x p) of predictors.
#' @param W Matrix (n x r) of expansion points (~predicted latent variables).
#' @param Beta Vector (p by 1) of regression coefficients.
#' @param Sigma Covariance matrix(r x r) of the latent vector.
#' @param psi Vector (r x 1) of conditional variance parameters.
#' @param type Vector (r x 1) indicating response types.
#' @param pen Regularization parameter lower bouding the eigenvalue of a
#' covariance matrix and multiplying an L2 penalty (see paper).
#' @param tol Tolerance parameter for trust region algorithm
#' @param maxit Maximum number of iterations for trust region algorithm.
#' @param quiet Logical indicating whether to print information while running.
#' @return Matrix of updated expansion points
update_W <- function(Y, X, W, Beta, Sigma, psi, type, pen = 1e-4, tol = 1e-8,
maxit = 100, quiet = T)
{
if(maxit <= 0) return(W) # Allows maxit[4] = 0 to skip W update
n <- nrow(Y)
r <- ncol(Y)
# Set lower limit on eigenvalues for stability
e_S <- eigen(Sigma, symmetric = TRUE)
e_S$values <- pmax(e_S$values, sqrt(.Machine$double.eps))
# Replace by "inverse"
Sigma <- e_S$vectors %*% (e_S$values * t(e_S$vectors))
# Precompute
Xb <- matrix(X %*% Beta, nrow = n, ncol = r, byrow = T)
# This update uses ui = W[ii, ] - Xb[ii, ]
# The ith objective is:
# -Y[ii, ] * u + c(u + Xb[ii, ]) + 0.5 * t(u) %*% solve(Sigma) %*% u +
# 0.5 * pen ||u||^2
for(ii in 1:n){
trust_obj <- function(u){
u <- as.matrix(u, ncol = 1)
d0 <- as.numeric(get_cumulant_diffs(u + Xb[ii, ], type, 0))
d1 <- as.numeric(get_cumulant_diffs(u + Xb[ii, ], type, 1))
d2 <- as.numeric(get_cumulant_diffs(u + Xb[ii, ], type, 2))
# Objective
val <- sum(d0 - Y[ii, ] * u)
val <- as.numeric(val + 0.5 * crossprod(u, Sigma %*% u))
val <- val + 0.5 * pen * sum(u^2)
# Gradient
g <- d1 - Y[ii, ]
g <- as.numeric(g + Sigma %*% u + pen * u)
# Hessian (modified)
H <- Sigma
diag(H) <- diag(H) + d2 + pen
return(list(value = val, gradient = g, hessian = H))
}
opt <- trust::trust(trust_obj, W[ii, ] - Xb[ii, ], rinit = 1, rmax = 100,
iterlim = maxit, fterm = tol, mterm = tol)
W[ii, ] <- opt$argument + Xb[ii, ]
if(!opt$converged){
warning(paste0("w_", ii, " update did not converge \n"))
}
}
return(W)
}
#' Accelerated projected gradient descent update function for Sigma
#'
#' @param R Matrix (n x r) of working residuals.
#' @param D2 Matrix (n x p) second derivatives of cumulant functions evaluated
#' at the expansion points.
#' @param psi Vector (r x 1) of conditional variance parameters.
#' @param Sigma.init Matrix (r x r ) with starting value for Sigma.
#' @param M Matrix (r x r) with restrictions for elements o Sigma
#' (NA = no restriction).
#' @param epsilon Scalar with lower bound on eigenvalues of Sigma.
#' @param tol.dykstra Scalar tolerance parameter for projection algorithm,
#' @param tol.ipiano Scalar tolerance for descent algorithm.
#' @param max.iter.dykstra Integer maximum number of iterations for projection
#' algorithm.
#' @param max.iter.ipiano Integer maximum number of iterations for descent
#' algorithm.
#' @param quiet Logical indicating whether to print information while running.
#' @return Matrix (r x r) with update of Sigma
update_Sigma_pgd <- function(R, D2, psi, Sigma.init, M, epsilon = 0,
tol.dykstra = 1e-12, tol.ipiano = 1e-10,
max.iter.dykstra = 1e3, max.iter.ipiano = 1e3,
quiet = TRUE)
{
if(max.iter.ipiano == 0) return(Sigma.init)
Sigmakm1 <- Sigma.init
Sigma <- Sigma.init
L0 <- 2
delta <- 1e-4
c2 <- 1e-6
# obj.prev <- evalObj(H, A, B, Sigma)
obj.prev <- obj_sigma_rcpp(Sigma= Sigma,
R_T = t(R),
D2_T = t(D2),
psi = psi,
use_idx = 1:nrow(R),
order = 0)$value
obj.orig <- obj.prev
for(kk in 1:max.iter.ipiano){
#tempGrad <- getGrad(H, A, B, Sigma)
tempGrad <- obj_sigma_rcpp(Sigma = Sigma,
R_T = t(R),
D2_T = t(D2),
psi = psi,
use_idx = 1:nrow(R),
order = 1)$gradient
Ln <- L0
linesearch <- TRUE
# KO: is this a good idea?
if(max(abs(tempGrad)) <= tol.ipiano) linesearch <- FALSE
while(linesearch){
# -- interial step projected grad ---
# b <- (delta + Ln/2)/(c2 + Ln/2)
# Bn <- (b-1)/(b-.5)
# alpha <- 2*(1 - Bn)/(2*c2 + Ln)
Bn <- .95
alpha <- 1.9*(1 - Bn)/Ln
temp <- Sigma - alpha*tempGrad + Bn * (Sigma - Sigmakm1)
#Sigma.temp <- CorrelationProjection(temp, M = M, epsilon= epsilon)
Sigma.temp <- project_rcpp(X = temp,
restr_idx = which(c(!is.na(M))),
restr = as.numeric(M[!is.na(M)]),
eps = epsilon,
tol = tol.dykstra,
maxit = max.iter.dykstra)
# obj.temp <- evalObj(H, A, B, Sigma.temp)
obj.temp <- obj_sigma_rcpp(Sigma = Sigma.temp,
R_T = t(R),
D2_T = t(D2),
psi = psi,
use_idx = 1:nrow(R),
order = 0)$value
if(!(obj.temp >= obj.prev + sum(tempGrad * t(Sigma.temp - Sigma)) +
(Ln / 2)*sum((Sigma.temp - Sigma)^2))){
Sigmakm1 <- Sigma
Sigma <- Sigma.temp
linesearch <- FALSE
} else {
Ln <- Ln * 5
if(Ln >= sqrt(.Machine$double.xmax)) linesearch <- FALSE
}
}
if(abs(obj.temp - obj.prev) < tol.ipiano*abs(obj.orig)){
break
}
obj.prev <- obj.temp
if(!quiet){
cat(obj.prev, "\n")
}
}
return(Sigma)
}
#' Trust region update function for Sigma
#'
#' @param Sigma_start Matrix (r x r ) with starting value for Sigma.
#' @param R Matrix (n x r) of working residuals.
#' @param D2 Matrix (n x p) second derivatives of cumulant functions evaluated
#' at the expansion points.
#' @param psi Vector (r x 1) of conditional variance parameters.
#' @param M Matrix (r x r) with restrictions for elements o Sigma
#' (NA = no restriction).
#' @param use_idx Vector with indexes for observations to use in update.
#' @param tol Scalar tolerance parameter for trust region algorithm.
#' @param maxit Integer maximum number of iterations for trust region algorithm.
#' @return Matrix (r x r) with update of Sigma
update_Sigma_trust <- function(Sigma_start, R, D2, psi, M, use_idx,
tol = sqrt(.Machine$double.eps),
maxit = 100)
{
if(maxit == 0) return(Sigma_start)
n <- nrow(R)
r <- ncol(R)
m <- matrixcalc::vech(M)
opt_idx <- lower.tri(M, diag = T) & is.na(M)
theta0 <- as.numeric(Sigma_start[opt_idx])
if(r > 1){
D <- matrixcalc::D.matrix(r) # This is very inefficient
} else{
D = matrix(1, 1, 1)
}
obj <- function(theta){
Sigma <- M
Sigma[opt_idx] <- theta
Sigma[upper.tri(Sigma)] <- t(Sigma)[upper.tri(Sigma)]
all_outs <- obj_sigma_rcpp(Sigma = Sigma, R_T = t(R), D2_T = t(D2),
psi = psi, use_idx = 1:n, order = 2)
g <- as.numeric(crossprod(D, as.numeric(all_outs$gradient)))[is.na(m)]
H <- crossprod(D, all_outs$hessian %*% D)[is.na(m), is.na(m)]
H <- as.matrix(H) # for when r = 1
return(list(value = all_outs$value, gradient = g, hessian = H))
}
opt <- trust::trust(objfun = obj, parinit = theta0, rinit = 1, rmax = 100,
iterlim = maxit, fterm = tol, mterm = tol)
if(!opt$converged){
warning("Sigma update did not converge \n")
}
Sigma <- Sigma_start
Sigma[opt_idx] <- opt$argument
Sigma[!is.na(M)] <- M[!is.na(M)]
Sigma[upper.tri(Sigma)] <- t(Sigma)[upper.tri(Sigma)]
return(Sigma)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.