#' Function for the PX-CAVI algorithm using the multivariate normal slab
#'
#'
#' This function employs the PX-CAVI algorithm proposed in Ning (2020).
#' The \eqn{g} in the slab density of the spike and slab prior is chosen to be the multivariate normal distribution, i.e.,
#' \eqn{N(0, \sigma^2/\lambda_1 I_r)}.
#' Details of the model and the prior can be found in the Details section in the description of the `VBsparsePCA()` function.
#'
#'
#'@param x Data an \eqn{n*p} matrix.
#'@param r Rank.
#'@param lambda Tuning parameter for the density \eqn{g}.
#'@param max.iter The maximum number of iterations for running the algorithm.
#'@param eps The convergence threshold; the default is \eqn{10^{-4}}.
#'@param jointly.row.sparse The default is true, which means that the jointly row sparsity assumption is used; one could not use this assumptio by changing it to false.
#'@param sig2.true The default is false, \eqn{\sigma^2} will be estimated; if sig2 is known and its value is given, then \eqn{\sigma^2} will not be estimated.
#'@param threshold The threshold to determine whether \eqn{\gamma_j} is 0 or 1; the default value is 0.5.
#'@param theta.int The initial value of theta mean; if not provided, the algorithm will estimate it using PCA.
#'@param theta.var.int The initial value of theta.var; if not provided, the algorithm will set it to be 1e-3*diag(r).
#'@param kappa.para1 The value of \eqn{\alpha_1} of \eqn{\pi(\kappa)}; default is 1.
#'@param kappa.para2 The value of \eqn{\alpha_2} of \eqn{\pi(\kappa)}; default is \eqn{p+1}.
#'@param sigma.a The value of \eqn{\sigma_a} of \eqn{\pi(\sigma^2)}; default is 1.
#'@param sigma.b The value of \eqn{\sigma_b} of \eqn{\pi(\sigma^2)}; default is 2.
#'
#'
#'@return \item{iter}{The number of iterations to reach convergence.}
#' \item{selection}{A vector (if \eqn{r = 1} or with the jointly row-sparsity assumption) or a matrix (if otherwise) containing the estimated value for \eqn{\boldsymbol \gamma}.}
#' \item{theta.mean}{The loadings matrix.}
#' \item{theta.var}{The covariance of each non-zero rows in the loadings matrix.}
#' \item{sig2}{Variance of the noise.}
#' \item{obj.fn}{A vector contains the value of the objective function of each iteration. It can be used to check whether the algorithm converges}
#'
#'@examples
#' #In this example, the first 20 rows in the loadings matrix are nonzero, the rank is 1
#' set.seed(2021)
#' library(MASS)
#' library(pracma)
#' n <- 200
#' p <- 1000
#' s <- 20
#' r <- 1
#' sig2 <- 0.1
#' # generate eigenvectors
#' U.s <- randortho(s, type = c("orthonormal"))
#' U <- rep(0, p)
#' U[1:s] <- as.vector(U.s[, 1:r])
#' s.star <- rep(0, p)
#' s.star[1:s] <- 1
#' eigenvalue <- seq(20, 10, length.out = r)
#' # generate Sigma
#' theta.true <- U * sqrt(eigenvalue)
#' Sigma <- tcrossprod(theta.true) + sig2*diag(p)
#' # generate n*p dataset
#' X <- t(mvrnorm(n, mu = rep(0, p), Sigma = Sigma))
#' result <- spca.cavi.mvn(x = X, r = 1)
#' loadings <- result$theta.mean
#' @export
spca.cavi.mvn <- function(
x, r, lambda = 1, max.iter = 100, eps = 1e-4,
jointly.row.sparse = TRUE, sig2.true = NA,
threshold = 0.5, theta.int = NA, theta.var.int = NA,
kappa.para1 = NA, kappa.para2 = NA, sigma.a = NA,
sigma.b = NA
) {
#------ Dimension of input matrices ------#
n <- dim(x)[1]
p <- dim(x)[2]
# initialize theta.hat and theta.var
svd.res <- svd(x)
if (is.na(theta.int) == TRUE) {
if (r == 1) {
theta.mean <- as.matrix((t(svd.res$u) %*% x)[1, ]) / sqrt(n-1)
} else {
theta.mean <- t((t(svd.res$u) %*% x)[1:r, ]) / sqrt(n-1)
}
} else {
theta.mean <- theta.int
}
if (is.na(theta.var.int) == TRUE) {
theta.var <- 1e-3*diag(r)
} else {
theta.var <- theta.var.int
}
# initialize z
if (jointly.row.sparse == F) {
if (r == 1) {
z <- rep(1,p)
} else {
z <- matrix(1, p, r)
}
} else {
z <- rep(1, p)
}
# initialize sigma - choose sig2 to be the second smallest eigenvalue of the covariance matrix
if (is.na(sig2.true) == TRUE) {
sig2 <- (svd.res$d[length(svd.res$d)-1])^2 / (n-1)
} else {
sig2 <- sig2.true
}
# choose hyperparameter for priors
# para1 for the prior of theta
if (is.na(kappa.para1) == TRUE) {
kappa.para1 <- 1
} else {
kappa.para1 <- kappa.para1
}
# para2 for the prior of theta
if (is.na(kappa.para2) == TRUE) {
kappa.para2 <- p+1
} else {
kappa.para2 <- kappa.para2
}
# para1 for the prior of sigma
if (is.na(sigma.a) == TRUE) {
sigma.a <- 1
} else {
sigma.a <- sigma.a
}
# para2 for the prior of sigma
if (is.na(sigma.b) == TRUE) {
sigma.b <- 2
} else {
sigma.b <- sigma.b
}
# create empty matrix to collect results
theta.res <- array(NA, dim = c(p, r, max.iter))
theta.var.res <- rep(NA, max.iter)
selection.res <- array(NA, dim = c(p,r, max.iter))
obj.fn.res <- matrix(NA, max.iter)
sig2.res <- rep(NA, max.iter)
# starting the loop
for (iter in 1:max.iter) {
########################## E-step: update q(w) #############################
# sum.theta.var <- Reduce("+", theta.var)
if (iter == 1) {
theta.mean.old <- theta.mean
z.hat.old <- z.theta <- z
}
z.theta.mean <- theta.mean * z.theta # obtain theta.mean * z
if (jointly.row.sparse == TRUE) {
var.w <- solve(crossprod(z.theta.mean)/sig2 + sum(z.theta)*(theta.var) + diag(r)) # var of w
} else {
z.theta.var <- diag(theta.var) * z.theta # obtain z * theta.var
if (r == 1) {
var.w <- solve(crossprod(z.theta.mean)/sig2 + sum(z.theta.var) + diag(r))
} else {
var.w <- solve(crossprod(z.theta.mean)/sig2 + diag(colSums(z.theta.var)) + diag(r)) # var of w
}
}
# mean of w
mean.w <- sapply(1:n, FUN = function(i) {
x[i,] %*% ((z.theta.mean) %*% var.w) / sig2
})
# MSE of w
MSE.w.i <- lapply(1:n, FUN = function(i) {
if (r == 1) {
(mean.w[i]^2 + var.w)
} else {
(tcrossprod(mean.w[, i]) + var.w)
}
})
MSE.w <- Reduce("+", MSE.w.i) # sum of n second moments of w_i
########################## update q(beta) #############################
## Mean of beta
if (r == 1) {
beta.term1 <- (MSE.w + lambda)
beta.term2 <- t(mean.w %*% x)
beta.mean <- as.matrix(c(beta.term2) / c(beta.term1))
beta.var <- 1/beta.term1
} else {
beta.term1 <- (MSE.w + lambda * diag(r))
beta.term2 <- t(mean.w %*% x)
beta.var <- solve(beta.term1)
beta.mean <- beta.term2 %*% beta.var
}
########################### update z ################################
h.hat <- sapply(1:p, FUN = function(j) {
if (r == 1) {
term1 <- - 1 /(sig2) * sum((mean.w %*% x[, j]) * beta.mean[j, ])
term2 <- 1 / (2*sig2) * beta.mean[j, ]^2 * c(MSE.w)
term3.1 <- sig2 * beta.var * c(MSE.w)
term3 <- term3.1 / (2*sig2)
term4 <- lambda / 2 * (sig2 * beta.var + (beta.mean[j])^2)
term5 <- - r * log (lambda) / 2 - log(beta.var)/2 -
1/2 + log(kappa.para2/kappa.para1)
} else {
if (jointly.row.sparse == TRUE) {
term1 <- - 1/(sig2) * sum((mean.w %*% x[, j]) * beta.mean[j, ])
term2 <- c(1/(2*sig2) * beta.mean[j, ] %*% MSE.w %*% beta.mean[j, ] )
term3.1 <- sig2 * beta.var %*% MSE.w
term3 <- sum(diag(term3.1)) / (2*sig2)
term4 <- lambda/2 * (sig2 * sum(diag(beta.var)) + crossprod(beta.mean[j, ]))
term5 <- - r * log (lambda) / 2 - log(det(beta.var))/2 -
1/2 + log(kappa.para2/kappa.para1)
} else {
# if jointly.row.sparse = False, then each term is a r*1 vector
term1 <- - 1/(sig2) * (mean.w %*% x[, j]) * beta.mean[j, ]
term2.1 <- tcrossprod(beta.mean[j, ]) %*% MSE.w
term2 <- diag( term2.1 )/(2*sig2)
term3.1 <- sig2 * beta.var %*% MSE.w
term3 <- diag(term3.1) / (2*sig2)
term4 <- lambda/2 * (sig2 * diag(beta.var) + beta.mean[j, ]^2)
term5 <- - log(lambda)/2 - log(diag(beta.var))/2 - 1/2 +
log(kappa.para2/kappa.para1)
}
}
sum <- -(c(term1) + c(term2) + c(term3) + c(term4) + c(term5))
})
if (jointly.row.sparse == F) {
if (r == 1) {
z.hat <- pracma::sigmoid(h.hat)
} else {
z.hat <- pracma::sigmoid(t(h.hat))
}
} else {
z.hat <- pracma::sigmoid(h.hat)
}
z <- ifelse(z.hat < threshold, 0, 1) # the estimated z for beta
z.beta.mean <- beta.mean * z
######################## update sig2 ###########################
if (is.na(sig2.true) == TRUE) {
# evaluate the sum for each j
sigma.foreach.j <- sapply(1:p, FUN = function(j) {
sum(x[, j]^2) - 2*sum(mean.w %*% x[, j] * z.beta.mean[j, ]) +
c(z.beta.mean[j, ] %*% MSE.w %*% z.beta.mean[j, ]) +
lambda*sum(z.beta.mean[j, ]^2)
})
sig2 <- (sum(sigma.foreach.j) + 2*sigma.b) / (p*n + 2*sigma.a+2)
}
#################### Rotate beta back to theta ###################
# first rotation obtain D
D <- MSE.w/n
chol.D <- t(chol(D))
beta.mean.tilde <- beta.mean %*% chol.D
theta.var <- chol.D %*% beta.var %*% t(chol.D) # theta.var = beta.var
# second rotation obtain A
beta.mean.tilde.svd <- svd(beta.mean.tilde)
A <- beta.mean.tilde.svd$u # obtain A
if (r == 1) {
theta.mean <- A %*% beta.mean.tilde.svd$d
} else {
theta.mean <- A %*% diag(beta.mean.tilde.svd$d)
}
# when r > 1 and jointly.row.sparsity == F, then the support of theta
# can be different from it of beta; an extra step is needed to obtain the
# support of theta
####################### obtain sparsity for theta #######################
if (jointly.row.sparse == F && r > 1) {
h.theta.hat <- sapply(1:p, FUN = function(j) {
# if not jointly.row.sparse, then each term is an r*1 vector
# one need to check if the sign of theta.mean is the same as it of beta.mean
if (sign(theta.mean[j, 1]) == sign(beta.mean[j, 1])) {
term1 <- - 1/(sig2) * (mean.w %*% x[, j]) * theta.mean[j, ]
} else {
term1 <- 1/(sig2) * (mean.w %*% x[, j]) * theta.mean[j, ]
}
term2.1 <- tcrossprod(theta.mean[j, ]) %*% MSE.w
term2 <- diag(term2.1)/(2*sig2)
term3.1 <- sig2 * theta.var %*% MSE.w
term3 <- diag(term3.1) / (2*sig2)
term4 <- lambda/2 * diag(sig2 * theta.var + tcrossprod(theta.mean[j, ]))
term5 <- - log(lambda)/2 - log(diag(theta.var))/2 - 1/2 +
log(kappa.para2/kappa.para1)
sum <- -(c(term1) + c(term2) + c(term3) + c(term4) + c(term5))
})
z.theta.hat <- pracma::sigmoid(t(h.theta.hat))
z.theta <- ifelse(z.theta.hat < threshold, 0, 1)
} else {
z.theta <- z
}
################# obtain the objective function #####################
obj.fn1 <- sum((tcrossprod(theta.mean) - tcrossprod(theta.mean.old))^2)
obj.fn2 <- sum(abs(z.hat - z.hat.old))
obj.fn <- max(obj.fn1, obj.fn2)
######################### collect results ############################
if (jointly.row.sparse == TRUE) {
selection.res[, 1,iter] <- z.theta
} else {
selection.res[, , iter] <- z.theta
}
theta.res[, , iter] <- theta.mean * z.theta
obj.fn.res[iter] <- obj.fn
sig2.res[iter] <- sig2
if (obj.fn < eps) {
break
} else {
theta.mean.old <- theta.mean
z.hat.old <- z.hat
}
}
if (jointly.row.sparse == TRUE) {
return(list(iter = iter, selection = selection.res[,1,iter],
theta.mean = theta.res[,,iter], theta.var = theta.var,
sig2 = sig2, obj.fn = obj.fn.res[1:iter]))
} else {
return(list(iter = iter, selection = selection.res[,,iter],
theta.mean = theta.res[,,iter], theta.var = theta.var,
sig2 = sig2, obj.fn = obj.fn.res[1:iter]))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.