Nothing
#' Function for the PX-CAVI algorithm using the Laplace slab
#'
#' This function employs the PX-CAVI algorithm proposed in Ning (2020).
#' The \eqn{g} in the slab density of the spike and slab prior is chosen to be the Laplace density, i.e.,
#' \eqn{N(0, \sigma^2/\lambda_1 I_r)}.
#' Details of the model and the prior can be found in the Details section in the description of the `VBsparsePCA()` function.
#' This function is not capable of handling the case when r > 1. In that case, we recommend to use the multivariate distribution instead.
#'
#'
#'
#'@param x Data an \eqn{n*p} matrix.
#'@param r Rank.
#'@param lambda Tuning parameter for the density \eqn{g}.
#'@param max.iter The maximum number of iterations for running the algorithm.
#'@param eps The convergence threshold; the default is \eqn{10^{-4}}.
#'@param sig2.true The default is false, \eqn{\sigma^2} will be estimated; if sig2 is known and its value is given, then \eqn{\sigma^2} will not be estimated.
#'@param threshold The threshold to determine whether \eqn{\gamma_j} is 0 or 1; the default value is 0.5.
#'@param theta.int The initial value of theta mean; if not provided, the algorithm will estimate it using PCA.
#'@param theta.var.int The initial value of theta.var; if not provided, the algorithm will set it to be 1e-3*diag(r).
#'@param kappa.para1 The value of \eqn{\alpha_1} of \eqn{\pi(\kappa)}; default is 1.
#'@param kappa.para2 The value of \eqn{\alpha_2} of \eqn{\pi(\kappa)}; default is \eqn{p+1}.
#'@param sigma.a The value of \eqn{\sigma_a} of \eqn{\pi(\sigma^2)}; default is 1.
#'@param sigma.b The value of \eqn{\sigma_b} of \eqn{\pi(\sigma^2)}; default is 2.
#'
#'
#'@return \item{iter}{The number of iterations to reach convergence.}
#' \item{selection}{A vector (if \eqn{r = 1} or with the jointly row-sparsity assumption) or a matrix (if otherwise) containing the estimated value for \eqn{\boldsymbol \gamma}.}
#' \item{theta.mean}{The loadings matrix.}
#' \item{theta.var}{The covariance of each non-zero rows in the loadings matrix.}
#' \item{sig2}{Variance of the noise.}
#' \item{obj.fn}{A vector contains the value of the objective function of each iteration. It can be used to check whether the algorithm converges}
#'
#' @examples
#' #In this example, the first 20 rows in the loadings matrix are nonzero, the rank is 1
#' set.seed(2021)
#' library(MASS)
#' library(pracma)
#' n <- 200
#' p <- 1000
#' s <- 20
#' r <- 1
#' sig2 <- 0.1
#' # generate eigenvectors
#' U.s <- randortho(s, type = c("orthonormal"))
#' U <- rep(0, p)
#' U[1:s] <- as.vector(U.s[, 1:r])
#' s.star <- rep(0, p)
#' s.star[1:s] <- 1
#' eigenvalue <- seq(20, 10, length.out = r)
#' # generate Sigma
#' theta.true <- U * sqrt(eigenvalue)
#' Sigma <- tcrossprod(theta.true) + sig2*diag(p)
#' # generate n*p dataset
#' X <- t(mvrnorm(n, mu = rep(0, p), Sigma = Sigma))
#' result <- spca.cavi.Laplace(x = X, r = 1)
#' loadings <- result$theta.mean
#' @export
#'
spca.cavi.Laplace <- function(
x, r = 1, lambda = 1, max.iter = 100, eps = 1e-3,
sig2.true = NA, threshold = 0.5,
theta.int = NA, theta.var.int = NA,
kappa.para1 = NA, kappa.para2 = NA, sigma.a = NA,
sigma.b = NA
) {
#------ Dimension of input matrices ------#
n <- dim(x)[1]
p <- dim(x)[2]
# initialize theta.hat and theta.var
svd.res <- svd(x)
if (is.na(theta.int) == TRUE) {
theta.mean <- as.matrix((t(svd.res$u) %*% x)[1, ]) / sqrt(n-1)
} else {
theta.mean <- theta.int
}
if (is.na(theta.var.int) == TRUE) {
theta.var <- lapply(1:p, FUN =function(j){1e-3*diag(r)})
} else {
theta.var <- lapply(1:p, FUN =function(j){theta.var.int})
}
# initialize z
z <- rep(1,p)
# initialize sigma - choose sig2 to be the second smallest eigenvalue of the covariance matrix
if (is.na(sig2.true) == TRUE) {
sig2 <- (svd.res$d[length(svd.res$d)-1])^2 / (n-1)
} else {
sig2 <- sig2.true
}
# choose hyperparameter for priors
# para1 for the prior of theta
if (is.na(kappa.para1) == TRUE) {
kappa.para1 <- 1
} else {
kappa.para1 <- kappa.para1
}
# para2 for the prior of theta
if (is.na(kappa.para2) == TRUE) {
kappa.para2 <- p+1
} else {
kappa.para2 <- kappa.para2
}
# para1 for the prior of sigma
if (is.na(sigma.a) == TRUE) {
sigma.a <- 1
} else {
sigma.a <- sigma.a
}
# para2 for the prior of sigma
if (is.na(sigma.b) == TRUE) {
sigma.b <- 2
} else {
sigma.b <- sigma.b
}
# create empty matrix to collect results
theta.res <- array(NA, dim = c(p, r, max.iter))
theta.var.res <- rep(NA, max.iter)
selection.res <- array(NA, dim = c(p,r, max.iter))
obj.fn.res <- matrix(NA, max.iter)
sig2.res <- rep(NA, max.iter)
for (iter in 1:max.iter) {
########################## E-step: update q(w) #############################
# sum.theta.var <- Reduce("+", theta.var)
if (iter == 1) {
theta.mean.old <- theta.mean
z.hat.old <- z.theta <- z
beta.mean <- theta.mean
beta.var <- theta.var
}
z.theta.mean <- theta.mean*z.theta # obtain theta.mean * z
z.theta.var.sum <- sum(sapply(1:p, function(j) {
z.theta[j] * theta.var[[j]]
}))
var.w <- solve(crossprod(z.theta.mean)/sig2 + z.theta.var.sum + diag(r)) # var of w
# mean of w
mean.w <- sapply(1:n, FUN = function(i) {
x[i,] %*% ((z.theta.mean) %*% var.w) / sig2
})
# MSE of w
MSE.w.i <- lapply(1:n, FUN = function(i) {
if (r == 1) {
(mean.w[i]^2 + var.w)
} else {
(tcrossprod(mean.w[, i]) + var.w)
}
})
MSE.w <- Reduce("+", MSE.w.i) # sum of n second moments of w_i
########################## update beta #############################
fn.beta.mean <- function(x, mean.w, var.w, beta.mean, beta.var, lambda, sig2) {
mean.w <- t(mean.w)
term1 <- - 1/sig2 * x %*% (t(mean.w) %*% beta.mean)
term2 <- 1/(2*sig2) * beta.mean %*% MSE.w %*% beta.mean
term3 <- lambda * sum(sapply(1:r, FUN = function(k) {
foldednorm.mean(beta.mean[k], sig2 * beta.var[k,k])
}))
obj <- c(term1) + c(term2) + c(term3)
return(obj)
}
# calculate beta.mean
beta.mean <- sapply(1:p, FUN = function(j) {
optimize(fn.beta.mean, c(-1e2, 1e2), x = x[, j], mean.w = mean.w, var.w = var.w,
beta.var = beta.var[[j]], lambda = lambda, sig2 = sig2)$minimum
})
beta.mean <- matrix(beta.mean, p, 1)
fn.beta.var <- function(mean.w, var.w, beta.mean, beta.var.vec, lambda, sig2, r) {
beta.var <- beta.var.vec
term1.1 <- c(beta.var) * MSE.w
term1 <- 1/2 * term1.1
term2 <- 1/2 * log(c(beta.var)+1e-6)
term3 <- lambda * foldednorm.mean(c(beta.mean), sig2 * c(beta.var))
obj <- c(term1) + c(term2) + c(term3)
return(obj)
}
beta.var <- lapply(1:p, function(j) {
value <- optimize(fn.beta.var, lower = 0, upper = 0.5, mean.w = mean.w, var.w = var.w,
beta.mean = beta.mean[j, ], lambda = lambda, sig2 = sig2,r=r)$minimum
mtx <- matrix(value, r, r)
})
#### Update z ########
h.hat <- sapply(1:p, FUN = function(j) {
term1 <- - 1/(sig2) * sum((mean.w %*% x[, j]) * beta.mean[j, ])
term2 <- 1/(2*sig2) * beta.mean[j, ]^2 * MSE.w
term3.1 <- sig2 * beta.var[[j]] * MSE.w
term3 <- term3.1 / (2*sig2)
term4 <- lambda * foldednorm.mean(beta.mean[j, ], sig2 * beta.var[[j]])
term5 <- r * log(sqrt(2/(pi * sig2 * lambda^2)))/2 - log(beta.var[[j]])/2 -
1/2 + log(kappa.para2/kappa.para1)
sum <- -(c(term1) + c(term2) + c(term3) + c(term4) + c(term5))
})
z.hat <- pracma::sigmoid(h.hat)
z <- ifelse(z.hat < threshold, 0, 1) # the estimated z for beta
z.beta.mean <- beta.mean * z
######################## Estimate sig2 ###########################
if (is.na(sig2.true) == TRUE) {
sigma2.fn <- function(sigma2) {
sigma.foreach.j <- sapply(1:p, FUN = function(j) {
sum(x[, j]^2 - 2*c(mean.w %*% x[, j] %*% z.beta.mean[j, ]))/(2*sig2) +
lambda*sum(sapply(1:r, FUN = function(k) {
foldednorm.mean(beta.mean[j, k], sig2 * beta.var[[j]][k,k])
})) - r*log(sig2)/2 + n*log(sig2)/2
})
obj.fn <- sum(sigma.foreach.j) + (sigma.a+1) * log(sig2) + sigma.b/sig2
}
sig2 <- 1/optimize(sigma2.fn, lower = 0, upper = 10)$minimum
}
#################### Rotate beta back to theta ###################
# first rotation obtain D
D <- MSE.w/n
chol.D <- t(chol(D))
beta.mean.tilde <- beta.mean %*% chol.D
theta.var <- lapply(1:p, function(j) {
chol.D %*% beta.var[[j]] %*% t(chol.D)
})
# theta.var = beta.var
# second rotation obtain A
beta.mean.tilde.svd <- svd(beta.mean.tilde)
A <- beta.mean.tilde.svd$u # obtain A
theta.mean <- A %*% beta.mean.tilde.svd$d
####################### obtain sparsity for theta #######################
z.theta <- z
################# obtain the objective function #####################
obj.fn1 <- sum((tcrossprod(theta.mean) - tcrossprod(theta.mean.old))^2)
obj.fn2 <- sum(abs(z.hat - z.hat.old))
obj.fn <- max(obj.fn1, obj.fn2)
######################### collect results ############################
selection.res[, 1,iter] <- z.theta
theta.res[, , iter] <- theta.mean * z.theta
obj.fn.res[iter] <- obj.fn
sig2.res[iter] <- sig2
if (obj.fn < eps) {
break
} else {
theta.mean.old <- theta.mean
z.hat.old <- z.hat
}
}
return(list(iter = iter, selection = selection.res[,,iter],
theta.mean = theta.res[,,iter], theta.var = theta.var,
sig2 = sig2, obj.fn = obj.fn.res[1:iter]))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.