Nothing
#' Supervised Integrated Factor Analysis
#'
#' @param X n*q matrix, centered covariate data
#' @param Y 1*K cell array, each cell is a n*pi centered primary
#' data, should roughly contribute equally to joint struct
#' @param r0 scalar, prespecified rank of common structure
#' @param r 1*K vector, prespecified rank of specific structures
#' @param relation 'linear' (default), use linear function to model
#' the relation between U and covariates
#' 'univ_kernel', use kernel methods for single covariates
#' @param sparsity 1 (default), when est B0 or B, use LASSO with BIC
#' to select the best tuning, suitable for high
#' @param max_niter default 1000, max number of iteration
#' @param convg_thres default 0.01, overall convergence threshold
#' @param type "A" or "Anp" or "B" or "Bnp", condition to use
#'
#' @return list with components
#' \item{B0:}{q*r0 matrix, coefficient for joint structure (may be sparse)}
#' \item{B:}{1*K cell array, each is a q*ri coefficient matrix (may be sparse)}
#' \item{V_joint:}{sum(p)*r0 matrix, stacked joint loadings, with orthonormal
#' columns.}
#'
#' \item{V_ind:}{1*K array, each is a pi*ri loading matrix, with
#' orthonormal columns}
#' \item{se2:}{1*K vector, noise variance for each phenotypic data set}
#' \item{Sf0:}{r0*r0 matrix, diagonal covariance matrix}
#' \item{Sf:}{1*K array, each is a ri*ri diagonal covariance matrix}
#' \item{EU:}{n*(r0+sum(ri)) matrix, conditional expectation of joint and individual scores}
#' @export
#'
#' @examples
#' \dontrun{
#' r0 <- 2
#' r <- c(3,3)
#' V <- matrix(stats::rnorm(10*2),10,2)
#' Fmatrix <- matrix(MASS::mvrnorm(n=1*500,rep(0,2),matrix(c(9,0,0,4),2,2)),500,2)
#' E <- matrix(stats::rnorm(500*10,0,3),500,10)
#' X <- tcrossprod(Fmatrix,V)+E
#' X <-scale(X,center=TRUE,scale=FALSE)
#' Y1 <- matrix(stats::rnorm(500*200,0,1),500,200)
#' Y2 <- matrix(stats::rnorm(500*200,0,1),500,200)
#' Y <- list(Y1,Y2)
#' SIFA(X,Y,r0,r,max_niter=200)
#' }
SIFA<- function(X,Y,r0,r,relation = 'linear', sparsity = 0,max_niter=1000, convg_thres=0.01, type ="Anp"){
#Check dimension
K <- length(Y)
p <- rep(0,K)
n <- nrow(X)
q <- ncol(X)
if(n <= q && sparsity==0 ){
warning("Must use sparse estimation to avoid overfitting!!!")
sparsity <- 1
}
for(k in 1:K){
n_temp <- nrow(Y[[k]])
p[k] <- ncol(Y[[k]])
if(n_temp != n){
stop("Sample mismatch!")
}
}
if(sum((r0+r)>min(n,p))){
stop("Too greedy on ranks!")
}
# initial value
grandY <- NULL
for(k in 1:K){
grandY <- cbind(grandY,Y[[k]])
}
grandU<-NULL
grandV<-NULL
#### A_np
if(type =="Anp"){
S <- RSpectra::svds(grandY,r0)
U_joint_ini <- S$u
D_joint_ini <- diag(S$d,nrow=length(S$d))
V_joint <- S$v
U_joint <- U_joint_ini%*%D_joint_ini
V_ind <- list()
Sf <- list()
loc3 <- 1
loc4 <- sum(p[1:1])
Vcurrent <- V_joint[loc3:loc4,]
Ycurrent <- Y[[1]]-tcrossprod(U_joint,Vcurrent)
Sk <- RSpectra::svds(Ycurrent,r[1])
U_k_ini <- Sk$u
D_k_ini <- diag(Sk$d,nrow=length(Sk$d))
V_k <- Sk$v
U_k <- U_k_ini%*%D_k_ini
V_ind[[1]] <- V_k
grandU <- cbind(grandU, U_k)
grandV <- pracma::blkdiag(V_ind[[1]])
for(k in 2:K){
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
Vcurrent <- V_joint[loc3:loc4,]
Ycurrent <- Y[[k]]-tcrossprod(U_joint,Vcurrent)
Sk <- RSpectra::svds(Ycurrent,r[k])
U_k_ini <- Sk$u
D_k_ini <- diag(Sk$d,nrow=length(Sk$d))
V_k <- Sk$v
U_k <- U_k_ini%*%D_k_ini
V_ind[[k]] <- V_k
grandU <- cbind(grandU, U_k)
grandV <- pracma::blkdiag(grandV,V_ind[[k]])
}
grandU <- cbind(U_joint, grandU)
grandV <- cbind(V_joint, grandV)
fX <- EstU_fun(X,grandU,relation,sparsity)
se2 <- rep(0,K)
loc1 <- r0+1
loc2 <- loc1 + r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
U_k <- grandU[,loc1:loc2]
fkX <-fX[,loc1:loc2]
Sf[[1]] <- diag(matrixStats::colSds(U_k-fkX)^2)
temp <- tcrossprod(grandU,grandV)
se2[1] <- norm(Y[[1]]-temp[,loc3:loc4],"f")^2/(n*p[1])
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1 + r[k]-1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
U_k <- grandU[,loc1:loc2]
fkX <-fX[,loc1:loc2]
Sf[[k]] <- diag(matrixStats::colSds(U_k-fkX)^2)
temp <- tcrossprod(grandU,grandV)
se2[k] <- norm(Y[[k]]-temp[,loc3:loc4],"f")^2/(n*p[k])
}
Sf0 <- diag(matrixStats::colSds(U_joint-fX[,1:r0])^2,nrow=length(matrixStats::colSds(U_joint-fX[,1:r0])^2))# a matrix
# disp('Initial est done!')
loglik <- loglikelihood1(Y,fX,V_joint,V_ind,se2,Sf0,Sf)
recloglik <- loglik
maxloglik <- loglik
niter <- 0
diff <- Inf
# diff_max <- inf
# tol_thres <- -10
recdiff <- list()
while(niter <= max_niter && abs(diff)>convg_thres){
cat(sprintf('This is the %.d th iterations, the diff= %.4g \n',niter,diff))
niter <- niter+1
# record last iter
grandV_old <- grandV
# E stop
# some critical values
grandse2 <- NULL # 1*sum(p)
grandSf0 <- t(diag(Sf0)) # 1*r0
grandSf <- NULL # 1*sum(r)
Delta_temp <- NULL # 1*sum(r)
for(k in 1:K){
grandse2 <- c(grandse2, rep(1,p[k])*(se2[k]))
grandSf <- c(grandSf, t(diag(Sf[[k]])))
Delta_temp <- c(Delta_temp, rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 #1*sum(p)
grandSf_inv <- 1/grandSf #1*sum(r)
grandSf0_inv <- 1/grandSf0 #1*r0
Delta1 <- pracma::arrayfun("*", t(grandV), grandse2_inv)%*%grandV #[r0+sum(r)]*[r0+sum(r)]
Delta2_inv <- solve(diag(c(grandSf0_inv, grandSf_inv))+Delta1)
temp <- grandV%*%tcrossprod(Delta2_inv,grandV)
SigmaY_inv <- diag(grandse2_inv)-pracma::arrayfun("*",pracma::arrayfun("*",temp,grandse2_inv),t(grandse2_inv))
VSigmaYinvV <- Delta1-Delta1%*%Delta2_inv%*%Delta1 #sum(p)*sum(p)
EU <- fX%*%(diag(sum(r)+r0)-pracma::arrayfun("*",VSigmaYinvV,c(grandSf0,grandSf)))+grandY%*%SigmaY_inv%*%pracma::arrayfun("*",grandV,c(grandSf0,grandSf))
covU <- diag(c(grandSf0,grandSf))-pracma::arrayfun("*",pracma::arrayfun("*",VSigmaYinvV,c(grandSf0,grandSf)),t(c(grandSf0,grandSf)))
# M step
# est V
# for iter 1 = 1:3 alternate between V_joint and V_ind
loc1 <- r0+1
loc2 <- loc1 + r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
EU0 <- EU[,(1:r0)]
EUk <- EU[,(loc1:loc2)]
EUkU0 <- n*covU[(loc1:loc2),(1:r0)]+crossprod(EUk,EU0) # r(k)*r0
EU0U0 <- n*covU[(1:r0),(1:r0)]+crossprod(EU0, EU0) #r0*r0
Vk <- V_ind[[1]] #p(k)*r(k)
# V_0k
V_0k <- (crossprod(Y[[1]],EU0)-Vk%*%EUkU0)%*%solve(EU0U0)
V_joint[(loc3:loc4),] <- V_0k
# V_k
S <- RSpectra::svds(crossprod(EUk,Y[[1]])-tcrossprod(EUkU0,V_0k),r[1])
tempL <- S$u
tempR <- S$v
V_ind[[1]] <- tcrossprod(tempR,tempL)
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1 + r[k] -1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
# critical value
EU0 <- EU[,(1:r0)]
EUk <- EU[,(loc1:loc2)]
EUkU0 <- n*covU[(loc1:loc2),(1:r0)]+crossprod(EUk,EU0) # r(k)*r0
EU0U0 <- n*covU[(1:r0),(1:r0)]+crossprod(EU0, EU0) #r0*r0
Vk <- V_ind[[k]] #p(k)*r(k)
# V_0k
V_0k <- (crossprod(Y[[k]],EU0)-Vk%*%EUkU0)%*%solve(EU0U0)
V_joint[(loc3:loc4),] <- V_0k
# V_k
S <- RSpectra::svds(crossprod(EUk,Y[[k]])-tcrossprod(EUkU0,V_0k),r[k])
tempL <- S$u
tempR <- S$v
V_ind[[k]] <- tcrossprod(tempR,tempL)
}
# end;
# est se2
loc1 <- r0+1
loc2 <- loc1 + r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
covUcurrent <- covU[c(1:r0,loc1:loc2),c(1:r0,loc1:loc2)] #(r0 + r(k))*(r0+r(k))
Ycurrent <- Y[[1]]
EUcurrent <- cbind(EU[,(1:r0)], EU[,(loc1:loc2)])
Vcurrent <- cbind(V_joint[(loc3:loc4),],V_ind[[1]])
temp1 <- sum(diag(tcrossprod(Ycurrent,Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(tcrossprod(EUcurrent,Vcurrent),Ycurrent)))
temp3 <- n*sum(diag(crossprod(Vcurrent,Vcurrent)%*%covUcurrent))
temp4 <- sum(diag(crossprod(EUcurrent,EUcurrent)%*%crossprod(Vcurrent,Vcurrent)))
se2[1] <- (temp1-temp2+temp3+temp4)/(n*p[1])
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1 + r[k]-1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
covUcurrent <- covU[c(1:r0,loc1:loc2),c(1:r0,loc1:loc2)] #(r0 + r(k))*(r0+r(k))
Ycurrent <- Y[[k]]
EUcurrent <- cbind(EU[,(1:r0)], EU[,(loc1:loc2)])
Vcurrent <- cbind(V_joint[(loc3:loc4),],V_ind[[k]])
temp1 <- sum(diag(tcrossprod(Ycurrent,Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(tcrossprod(EUcurrent,Vcurrent),Ycurrent)))
temp3 <- n*sum(diag(crossprod(Vcurrent,Vcurrent)%*%covUcurrent))
temp4 <- sum(diag(crossprod(EUcurrent,EUcurrent)%*%crossprod(Vcurrent,Vcurrent)))
se2[k] <- (temp1-temp2+temp3+temp4)/(n*p[k])
}
# est fX
fX <- EstU_fun(X,EU,relation,sparsity)
# est Sf0 and Sf
f0X <- fX[,(1:r0)]
EUcurrent <- EU[,(1:r0)]
covUcurrent <- covU[(1:r0),(1:r0)]
temp1 <- n*covUcurrent
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(f0X,f0X)
temp4 <- crossprod(f0X,EUcurrent)
temp5 <- crossprod(EUcurrent,f0X)
#Sf0 <- (temp1+temp2+temp3-temp4-temp5)/n #questionable!!!
Sf0 <- diag(diag(temp1+temp2+temp3-temp4-temp5)/n,nrow = length(diag(temp1+temp2+temp3-temp4-temp5)/n)) #exactly follow the draft
loc1 <- r0+1
loc2 <- loc1+r[1]-1
fkX <- fX[,(loc1:loc2)]
EUcurrent <- EU[,(loc1:loc2)]
covUcurrent <- covU[(loc1:loc2),(loc1:loc2)]
temp1 <- n*covUcurrent
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(fkX,fkX)
temp4 <- crossprod(fkX,EUcurrent)
temp5 <- crossprod(EUcurrent,fkX)
Sf[[1]] <- diag(diag((temp1+temp2+temp3-temp4-temp5)/n))
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
fkX <- fX[,(loc1:loc2)]
EUcurrent <- EU[,(loc1:loc2)]
covUcurrent <- covU[(loc1:loc2),(loc1:loc2)]
temp1 <- n*covUcurrent
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(fkX,fkX)
temp4 <- crossprod(fkX,EUcurrent)
temp5 <- crossprod(EUcurrent,fkX)
Sf[[k]] <- diag(diag((temp1+temp2+temp3-temp4-temp5)/n))
}
# Post Standardization for V0 (and Sf0 and B0)
S1 <- RSpectra::svds(V_joint%*%tcrossprod(Sf0,V_joint),r0)
Sf0 <- diag(S1$d,nrow=length(S1$d))
V_joint_new <- S1$u
fX[,(1:r0)] <- fX[,(1:r0)]%*%crossprod(V_joint,V_joint_new)
V_joint <- V_joint_new
# reorder columns of B, V, and rows/columns of Sf
tempA <- sort(diag(Sf[[1]]), decreasing = T, index.return = T)
temp <- tempA$x
I <- tempA$ix
Sf[[1]] <- diag(temp)
loc1 <- r0+1
loc2 <- loc1+r[1]-1
temp <- fX[,(loc1:loc2)]
fX[,(loc1:loc2)] <- temp[,I]
V_ind[[1]] <- V_ind[[1]][,I]
for(k in 2:K){
tempA <- sort(diag(Sf[[k]]), decreasing = T, index.return = T)
temp <- tempA$x
I <- tempA$ix
Sf[[k]] <- diag(temp)
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
temp <- fX[,(loc1:loc2)]
fX[,(loc1:loc2)] <- temp[,I]
V_ind[[k]] <- V_ind[[k]][,I]
}
# get grandV
grandV <- pracma::blkdiag(V_ind[[1]])
for(k in 2:K){
grandV <- pracma::blkdiag(grandV,V_ind[[k]])
}
grandV <- cbind(V_joint,grandV) # sum(p)*(r0+sum(r))
# stopping rule
diff <- PrinAngle(grandV, grandV_old)
recdiff <- cbind(recdiff,diff)
# draw loglik trend
#loglik <- loglikelihood1(Y,fX,V_joint,V_ind,se2,Sf0,Sf)
#diff_max <- loglik-maxloglik #insurance, avoid likelihood decrease
#maxloglik <- max(maxloglik,loglik)
# check
#recloglik <- cbind(recloglik,loglik)
#plots
}
# calculate EU
grandSf0 <- t(diag(Sf0))
grandse2 <- rep(1,p[1])*se2[1]
grandSf <- t(diag(Sf[[1]]))
grandV_temp <- pracma::blkdiag(V_ind[[1]])
Delta_temp <- rep(1,r[1])*(1/se2[1])
for(k in 2:K){
grandse2 <- c(grandse2,rep(1,p[k])*se2[k])
grandSf <- c(grandSf, t(diag(Sf[[k]])))
grandV_temp <- pracma::blkdiag(grandV_temp, V_ind[[k]])
Delta_temp <- c(Delta_temp, rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 # 1*sum(p)
grandSf_inv <- 1/grandSf # 1*sum(r)
grandSf0_inv <- 1/grandSf0 # 1*r0
grandV <- cbind(V_joint, grandV_temp) # sum(p)*(r0+sum(r))
Delta1 <- pracma::arrayfun("*", t(grandV), grandse2_inv)%*%grandV # [r0+sum(r)]*[r0+sum(r)]
Delta2_inv <- solve(diag(c(grandSf0_inv,grandSf_inv))+Delta1)
temp <- grandV%*%tcrossprod(Delta2_inv,grandV) # sum(p)*sum(p)
SigmaY_inv <- diag(grandse2_inv)-pracma::arrayfun("*",pracma::arrayfun("*",temp,grandse2_inv),t(grandse2_inv))
# sum(p)*sum(p), not diagonal because of common structure, diff from SupSVD
VSigmaYinvV <- Delta1 - Delta1%*%Delta2_inv%*%Delta1 #(r0+sum(r))*(r0+sum(r))
EU <- fX%*%(diag(sum(r)+r0)-pracma::arrayfun("*",VSigmaYinvV,c(grandSf0,grandSf)))+
grandY%*%SigmaY_inv%*%pracma::arrayfun("*",grandV,c(grandSf0,grandSf))
# n*(r0+sum(r)), conditional mean
# Print convergence information
if(niter < max_niter){
cat("SIPCA_A1 converges after", pracma::num2str(niter), "iterations.")
}
else{
cat("SIPCA_A1 NOT converge after", pracma::num2str(max_niter), "iterations!!! Final change in angle:", pracma::num2str(diff))
}
return(list(V_joint=V_joint, V_ind=V_ind, se2=se2, Sf0=Sf0, Sf=Sf, EU=EU, fX=fX))
}
#### B_np
else if(type == "Bnp"){
S1 <- RSpectra::svds(grandY,r0) #note: segments of V_joint_ini correspond to each data set may not be orthogonal as desired.
U_joint <- S1$u
D_joint <- diag(S1$d,nrow=length(S1$d))
V_joint_ini <- S1$v
U_joint <- U_joint%*%D_joint
V_ind <- list()
Sf <- list()
loc3 <- 1
loc4 <- sum(p[1:1])
Ycurrent <- Y[[1]]-tcrossprod(U_joint,V_joint_ini[(loc3:loc4),])
S2 <- RSpectra::svds(Ycurrent,r[1])
U_k_ini <- S2$u
D_k_ini <- diag(S2$d,nrow=length(S2$d))
V_k_ini <- S2$v
U_k <- U_k_ini%*%D_k_ini
V_ind[[1]] <- V_k_ini
grandU <- cbind(grandU,U_k)
grandV <- pracma::blkdiag(V_ind[[1]])
for(k in 2:K){
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
Ycurrent <- Y[[k]]-tcrossprod(U_joint,V_joint_ini[(loc3:loc4),])
S2 <- RSpectra::svds(Ycurrent,r[k])
U_k_ini <- S2$u
D_k_ini <- diag(S2$d,nrow=length(S2$d))
V_k_ini <- S2$v
U_k <- U_k_ini%*%D_k_ini
V_ind[[k]] <- V_k_ini
grandU <- cbind(grandU,U_k)
grandV <- pracma::blkdiag(grandV,V_ind[[k]])
}
# postprocess V_joint_ini such that it follows our identifiability condition
V_joint <- matrix(0,sum(p),r0)
loc3 <- 1
loc4 <- sum(p[1:1])
V_joint_k <- GramSchmidt(as.matrix(V_joint_ini[loc3:loc4,]),V_ind[[1]])$Q1
V_joint[loc3:loc4,] <- V_joint_k*(1/sqrt(K))
for(k in 2:K){
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
V_joint_k <- GramSchmidt(as.matrix(V_joint_ini[loc3:loc4,]),V_ind[[k]])$Q1
V_joint[loc3:loc4,] <- V_joint_k*(1/sqrt(K))
}
grandV <- cbind(V_joint,grandV)
grandU <- cbind(U_joint,grandU)
fX <- EstU_fun(X,grandU,relation,sparsity)
se2 <- rep(0,K)
loc1 <- r0+1
loc2 <- loc1+r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
U_k <- grandU[,loc1:loc2]
fkX <- fX[,loc1:loc2]
Sf[[1]] <- diag(apply((U_k-fkX)^2,2,stats::sd))
temp <- tcrossprod(grandU, grandV)
se2[1] <- (norm(Y[[1]]-temp[,loc3:loc4],type = "F")%*%norm(Y[[1]]-temp[,loc3:loc4],type = "F"))%*%solve(n*p[1])
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
U_k <- grandU[,loc1:loc2]
fkX <- fX[,loc1:loc2]
Sf[[k]] <- diag(apply((U_k-fkX)^2,2,stats::sd))
temp <- tcrossprod(grandU, grandV)
se2[k] <- (norm(Y[[k]]-temp[,loc3:loc4],type = "F")%*%norm(Y[[k]]-temp[,loc3:loc4],type = "F"))%*%solve(n*p[k])
}
Sf0 <- diag(apply((U_joint-fX[,1:r0])^2,2,stats::sd),nrow=length(apply((U_joint-fX[,1:r0])^2,2,stats::sd)))
#disp('Initial est done!')
loglik <- loglikelihood1(Y,fX,V_joint,V_ind,se2,Sf0,Sf)
recloglik <- loglik
maxloglik <- loglik
niter <- 0
diff = Inf
recdiff <- NULL
while(niter<=max_niter && abs(diff)>convg_thres){
# && diff_max > tol_thres
cat(sprintf('This is the %.d th iterations, the diff= %.4g \n',niter,diff))
niter <- niter+1
#record last iter
grandV_old <- grandV
# E step
# some critical values
grandse2 <- NULL #1*sum(p)
grandSf <- t(diag(Sf0)) #1*(r0+sum(r))
Delta_temp <- rep(1,r0)*sum(se2^(-1))/K
for(k in 1:K){
grandse2 <- c(grandse2,(rep(1,p[k])*se2[k]))
grandSf <- c(grandSf,t(diag(Sf[[k]])))
Delta_temp <- c(Delta_temp,rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 #1*sum(p)
grandSf_inv <- 1/grandSf #1*(r0+sum(r))
Delta1 <- Delta_temp
# bsxfun("*",t(grandV), grandse2_inv)%*%grandV # 1*(r0+sum(r))
Delta2 <- grandSf_inv+Delta1 #1*(r0+sum(r))
temp <- pracma::arrayfun("*", grandV, (1/Delta2))%*%t(grandV) #sum(p)*sum(p)
SigmaY_inv <- diag(grandse2_inv)-pracma::arrayfun("*",pracma::arrayfun("*", temp, grandse2_inv), t(grandse2_inv))#sum(p)*sum(p), not diagonal because of common structure, diff from SupSVD
VSigmaYinvV <- Delta1 - Delta1*(1/Delta2)*Delta1 #1*(r0+sum(r))
EU <- fX%*%diag(rep(1,sum(r)+r0)-VSigmaYinvV*grandSf)+grandY%*%SigmaY_inv%*%pracma::arrayfun("*", grandV, grandSf) #conditional mean
covU <- grandSf - grandSf*VSigmaYinvV*grandSf#1*(r0+sum(r)), conditional variance (turns out to be a diagonal matrix)
# M step
# est V and se2
loc1 <- r0+1
loc2 <- loc1+r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
Ycurrent <- Y[[1]]
EUcurrent <- cbind(EU[,1:r0], EU[,loc1:loc2])
EUcurrent_star <- cbind((1/sqrt(K))*EU[,1:r0],EU[,loc1:loc2])
# V
S3 <- RSpectra::svds(crossprod(Ycurrent,EUcurrent_star),(r0+r[1]))
tempL <- S3$u
tempR <- S3$v
Vcurrent_star <- tcrossprod(tempL,tempR) # should have orthonormal columns
V_joint[loc3:loc4,] <- Vcurrent_star[,1:r0]*(1/sqrt(K))
V_ind[[1]] <- Vcurrent_star[,(r0+1):(r0+r[1])]
Vcurrent <- cbind(V_joint[loc3:loc4,],V_ind[[1]])
# clear EUcurrent_star Vcurrent_star
# se2 (directly from SupSVD formula)
covUcurrent <- covU[c((1:r0),(loc1:loc2)),drop = "FLASE"]#1*(r0+r(k))
temp1 <- sum(diag(tcrossprod(Ycurrent,Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(EUcurrent,Vcurrent)%*%t(Ycurrent)))
temp3 <- n*sum(diag(crossprod(Vcurrent,Vcurrent)%*%diag(covUcurrent)))
temp4 <- sum(diag(crossprod(EUcurrent,EUcurrent)%*%crossprod(Vcurrent,Vcurrent)))
se2[1] <- (temp1-temp2+temp3+temp4)/(n*p[1])
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
Ycurrent <- Y[[k]]
EUcurrent <- cbind(EU[,1:r0], EU[,loc1:loc2])
EUcurrent_star <- cbind((1/sqrt(K))*EU[,1:r0],EU[,loc1:loc2])
# V
S3 <- RSpectra::svds(crossprod(Ycurrent,EUcurrent_star),(r0+r[k]))
tempL <- S3$u
tempR <- S3$v
Vcurrent_star <- tcrossprod(tempL,tempR) # should have orthonormal columns
V_joint[loc3:loc4,] <- Vcurrent_star[,1:r0]*(1/sqrt(K))
V_ind[[k]] <- Vcurrent_star[,(r0+1):(r0+r[k])]
Vcurrent <- cbind(V_joint[loc3:loc4,],V_ind[[k]])
# clear EUcurrent_star Vcurrent_star
# se2 (directly from SupSVD formula)
covUcurrent <- covU[c((1:r0),(loc1:loc2)),drop = "FLASE"]#1*(r0+r(k))
temp1 <- sum(diag(tcrossprod(Ycurrent,Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(EUcurrent,Vcurrent)%*%t(Ycurrent)))
temp3 <- n*sum(diag(crossprod(Vcurrent,Vcurrent)%*%diag(covUcurrent)))
temp4 <- sum(diag(crossprod(EUcurrent,EUcurrent)%*%crossprod(Vcurrent,Vcurrent)))
se2[k] <- (temp1-temp2+temp3+temp4)/(n*p[k])
}
# est fX
fX <- EstU_fun(X, EU,relation, sparsity)# estimate an n*(r0+sum(ri)) matrix
# est Sf
f0X <- fX[,1:r0]
EUcurrent <- EU[,1:r0]
covUcurrent <- covU[1:r0,drop = "FLASE"]
temp1 <- n*diag(covUcurrent,nrow=length(covUcurrent))
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(f0X,EUcurrent)
temp4 <- crossprod(f0X,EUcurrent)
temp5 <- crossprod(EUcurrent,f0X)
Sf0 <- diag(diag(temp1+temp2+temp3-temp4-temp5)/n,nrow=length(diag(temp1+temp2+temp3-temp4-temp5)/n))
loc1 <- r0+1
loc2 <- loc1+r[1]-1
fkX <- fX[,loc1:loc2]
EUcurrent <- EU[,loc1:loc2]
covUcurrent <- covU[loc1:loc2]
temp1 <- n*diag(covUcurrent)
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(fkX, fkX)
temp4 <- crossprod(fkX,EUcurrent)
temp5 <- crossprod(EUcurrent,fkX)
Sf[[1]] <- diag(diag((temp1+temp2+temp3-temp4-temp5)/n))
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
fkX <- fX[,loc1:loc2]
EUcurrent <- EU[,loc1:loc2]
covUcurrent <- covU[loc1:loc2]
temp1 <- n*diag(covUcurrent)
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(fkX, fkX)
temp4 <- crossprod(fkX,EUcurrent)
temp5 <- crossprod(EUcurrent,fkX)
Sf[[k]] <- diag(diag((temp1+temp2+temp3-temp4-temp5)/n))
# reorder columns of B, V, and rows/columns of Sf
tempA <- sort(diag(Sf[[k]]), decreasing = T, index.return = T)
temp <- tempA$x
I <- tempA$ix
Sf[[k]] <- diag(temp)
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
temp <- fX[,(loc1:loc2)]
fX[,(loc1:loc2)] <- temp[,I]
V_ind[[k]] <- V_ind[[k]][,I]
}
#calc grandV
grandV <- NULL
grandV <- pracma::blkdiag(V_ind[[1]])
for(k in 2:K){
grandV <- pracma::blkdiag(grandV, V_ind[[k]])
}
grandV <- cbind(V_joint,grandV) #sum(p)*(r0+sum(r))
#stopping rule
diff <- PrinAngle(grandV, grandV_old)
recdiff <- cbind(recdiff,diff)
#draw loglik trend
loglik <- loglikelihood1(Y, fX, V_joint,V_ind,se2, Sf0,Sf)
diff_max <- loglik-maxloglik #insurance, avoid likelihood decrease
maxloglik <- max(maxloglik,loglik)
#check
recloglik <- cbind(recloglik, loglik)
#par(mfrow=c(2,1))
#plot(recloglik,type = "b",main = "likelihood")
#plot(recdiff, type = "b", main = paste("angle=",pracma::num2str(diff)))
}
# calc EU
grandse2 <- NULL#1*sum(p)
grandSf <- t(diag(Sf0)) #1*(r0+sum(r))
grandV_temp <- NULL #combination of loadings: first few long columns are joint loadings, subsequent diagonal blocks are individual loadings
Delta_temp <- rep(1,r0)*sum(se2^(-1))/K
grandse2 <- c(grandse2,(rep(1,p[1])*se2[1]))
grandSf <- c(grandSf, t(diag(Sf[[1]])))
grandV_temp <- pracma::blkdiag(V_ind[[1]])
Delta_temp <- c(Delta_temp,rep(1,r[1])*(1/se2[1]))
for(k in 2:K){
grandse2 <- c(grandse2,(rep(1,p[k])*se2[k]))
grandSf <- c(grandSf, t(diag(Sf[[k]])))
grandV_temp <- pracma::blkdiag(grandV_temp,V_ind[[k]])
Delta_temp <- c(Delta_temp,rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 #1*sum(p)
grandSf_inv <- 1/grandSf #1*(r0+sum(r))
grandV <- cbind(V_joint,grandV_temp) #sum(p)*(r0+sum(r))
Delta1 <- Delta_temp #bsxfun("*", t(grandV), grandse2_inv)%*%grandV #1*(r0+sum(r))
Delta2 <- grandSf_inv+Delta1 #1*(r0+sum(r))
temp <- pracma::arrayfun("*", grandV, (1/Delta2))%*%t(grandV) #sum(p)*sum(p)
SigmaY_inv <- diag(grandse2_inv)-pracma::arrayfun("*", pracma::arrayfun("*",temp,grandse2_inv), t(grandse2_inv)) #sum(p)*sum(p), not diagonal because of common structure, diff from SupSVD
VSigmaYinV <- Delta1-Delta1*(1/Delta2)*Delta1 #1*(r0+sum(r))
EU <- fX%*%diag(rep(1,sum(r)+r0)-VSigmaYinV*grandSf)+grandY%*%SigmaY_inv%*%pracma::arrayfun("*", grandV,grandSf) #conditional mean
#Print convergence information
if(niter<max_niter){
cat("SIPCA_B1 converges after",pracma::num2str(niter),"iterations.")
}else{
cat("SIPCA_B1 NOT converge after", pracma::num2str(max_niter),"iterations!!! Final change in angle:",pracma::num2str(diff))
}
return(list(V_joint=V_joint, V_ind = V_ind, se2 = se2, Sf0 = Sf0, Sf = Sf, EU = EU, fX=fX))
}
###AAA
else if(type=="A"){
S <- RSpectra::svds(grandY,r0)
U_joint_ini <- S$u
D_joint_ini <- diag(S$d,nrow=length(S$d))
V_joint <- S$v
# note: segments of V_joint_ini corresponding to each data set may not be orthogonal as desired
U_joint <- U_joint_ini%*%D_joint_ini
if(sparsity==1){
B0 <- matrix(0,q,r0)
for(i in 1:r0){
fit <- glmnet::glmnet(X, U_joint[,i], alpha = 1, lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B0[,i] <- fit$beta[,ind]
}
}else{# if sparsity = 0, no B-sparsity
B0 <- solve(crossprod(X,X), t(X))%*%U_joint
}
Sf0 <- diag(apply((U_joint-X%*%B0)^2,2,stats::sd),nrow=length(apply((U_joint-X%*%B0)^2,2,stats::sd)))
V_ind <- list()
B <- list()
for(k in 1:K){
B[[k]] <- matrix(0,q,r[k])
}
Sf <- list()
loc3 <- 1
loc4 <- sum(p[1:1])
Vcurrent <- V_joint[(loc3:loc4),]
Ycurrent <- Y[[1]]-tcrossprod(U_joint,Vcurrent)
S2 <- RSpectra::svds(Ycurrent,r[1])
U_k_ini <- S2$u
D_k_ini <- diag(S2$d,nrow=length(S2$d))
V_k <- S2$v
U_k <- U_k_ini%*%D_k_ini
V_ind[[1]] <- V_k
if (sparsity==1){
for(i in 1:r[1]){
fit <- glmnet::glmnet(X, U_k[,i], alpha = 1, lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[1]][,i] <- fit$beta[,ind]
}
}else{# if sparsity = 0, no B-sparsity
B[[1]] <- solve(crossprod(X,X),t(X))%*%U_k
}
Sf[[1]] <- diag(apply((U_k - X%*%B[[1]])^2,2,stats::sd))
se2 <- rep(0,K)
se2[1] <- norm(Y[[k]]-tcrossprod(U_k,V_k)-tcrossprod(U_joint,Vcurrent), type = "F")^2/(n*p[1])
for(k in 2:K){
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
Vcurrent <- V_joint[(loc3:loc4),]
Ycurrent <- Y[[k]]-tcrossprod(U_joint,Vcurrent)
S2 <- RSpectra::svds(Ycurrent,r[k])
U_k_ini <- S2$u
D_k_ini <- diag(S2$d,nrow=length(S2$d))
V_k <- S2$v
U_k <- U_k_ini%*%D_k_ini
V_ind[[k]] <- V_k
if (sparsity==1){
for(i in 1:r[k]){
fit <- glmnet::glmnet(X, U_k[,i], alpha = 1, lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[k]][,i] <- fit$beta[,ind]
}
}else{# if sparsity = 0, no B-sparsity
B[[k]] <- solve(crossprod(X,X),t(X))%*%U_k
}
Sf[[k]] <- diag(apply((U_k - X%*%B[[k]])^2,2,stats::sd))
se2[k] <- norm(Y[[k]]-tcrossprod(U_k,V_k)-tcrossprod(U_joint,Vcurrent), type = "F")^2/(n*p[k])
}
grandV <- NULL
grandV <- pracma::blkdiag(V_ind[[1]])
for (k in 2:K){
grandV <- pracma::blkdiag(grandV,V_ind[[k]])
}
grandV <- cbind(V_joint, grandV) # sum(p)*(r0+sum(r))
#disp('Initial set done!')
loglik <- loglikelihood(X,Y,B0,B,V_joint,V_ind,se2,Sf0,Sf)
recloglik <- loglik
maxloglik <- loglik
niter <- 0
diff <- Inf
diff_max <- Inf
tol_thres <- -10
recdiff <- NULL
while(niter<=max_niter && abs(diff)>convg_thres)#&& diff_max>tol_thres
{
cat(sprintf('This is the %.d th iterations, the diff= %.4g \n',niter,diff))
niter <- niter + 1
#record last iter
#loglik_old <- loglik
#V_joint_old <- V_joint
grandV_old <-grandV
# E step
# some critical values
grandse2 <- NULL #1*sum(p)
grandSf0 <- t(diag(Sf0)) # 1*r0
grandSf <- NULL #1*sum(r)
#grandV_temp #combination of loadings: first few long columns are joint loadings, subsequent diagonal blocks are individual loadings
grandB_temp <- NULL #concatenate B
Delta_temp <- NULL #1*sum(r)
for(k in 1:K){
grandse2 <- c(grandse2,(rep(1,p[k])*se2[k]))
grandSf <- c(grandSf,t(diag(Sf[[k]])))
# grandV_temp = pracma::blkdiag(grandV_temp, V_ind[[k]])
grandB_temp <- cbind(grandB_temp, B[[k]])
Delta_temp <- c(Delta_temp, rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 #1*sum(p)
grandSf_inv <- 1/grandSf # 1*sum(r)
grandSf0_inv <- 1/grandSf0 #1*r0
# grandV = cbind(V_joint, grandV_temp) #sum(p)*(r0+sum(r))
grandB <- cbind(B0,grandB_temp) # q*(r0+sum(r))
Delta1 <- pracma::arrayfun("*", t(grandV), grandse2_inv)%*%grandV #[r0+sum(r)]*[r0+sum(r)]
Delta2_inv <- solve(diag(c(grandSf0_inv,grandSf_inv))+Delta1)
temp <- grandV%*%tcrossprod(Delta2_inv,grandV)
SigmaY_inv <- diag(grandse2_inv) - pracma::arrayfun("*", pracma::arrayfun("*",temp, grandse2_inv), t(grandse2_inv)) #sum(p)*sum(p), not diagonal because of common structure, diff from SupSVD
VSigmaYinvV <- Delta1 - Delta1%*%Delta2_inv%*%Delta1 #(r0+sum(r))*(r0+sum(r))
EU <- X%*%grandB%*%(diag(sum(r)+r0)-pracma::arrayfun("*", VSigmaYinvV,c(grandSf0,grandSf)))+grandY%*%SigmaY_inv%*%pracma::arrayfun("*", grandV, c(grandSf0, grandSf)) # n*(r0+sum(r)), conditional mean
covU <- diag(c(grandSf0,grandSf))-pracma::arrayfun("*", pracma::arrayfun("*", VSigmaYinvV, c(grandSf0,grandSf)), t(c(grandSf0, grandSf))) #(r0+sum(r))*(r0+sum(r))
# M step
# V and se2
# for iter = 1:3 # alternate between V_joint and V_ind
loc1 <- r0 + +1
loc2 <- loc1 + r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
# critical values
EU0 <- EU[,(1:r0)]
EUk <- EU[,(loc1:loc2)]
EUkU0 <- n*covU[(loc1:loc2), (1:r0)]+crossprod(EUk, EU0) #r(k)*r0
EU0U0 <- n*covU[(1:r0), (1:r0)]+crossprod(EU0,EU0) #r0*r0
Vk <- V_ind[[1]]
#V_0k
V_0k <- (crossprod(Y[[1]], EU0)-Vk%*%EUkU0)%*%solve(EU0U0)
V_joint[(loc3:loc4),] <- V_0k
#V_k
S3 <- RSpectra::svds(crossprod(EUk, Y[[1]])-tcrossprod(EUkU0, V_0k),r[1])
V_ind[[1]] <- tcrossprod(S3$v, S3$u)
for(k in 2:K){
loc1 <- r0 + sum(r[1:(k-1)])+1
loc2 <- loc1 + r[k]-1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
# critical values
EU0 <- EU[,(1:r0)]
EUk <- EU[,(loc1:loc2)]
EUkU0 <- n*covU[(loc1:loc2), (1:r0)]+crossprod(EUk, EU0) #r(k)*r0
EU0U0 <- n*covU[(1:r0), (1:r0)]+crossprod(EU0,EU0) #r0*r0
Vk <- V_ind[[k]]
#V_0k
V_0k <- (crossprod(Y[[k]], EU0)-Vk%*%EUkU0)%*%solve(EU0U0)
V_joint[(loc3:loc4),] <- V_0k
#V_k
S3 <- RSpectra::svds(crossprod(EUk, Y[[k]])-tcrossprod(EUkU0, V_0k),r[k])
V_ind[[k]] <- tcrossprod(S3$v, S3$u)
}
# est se2
loc1 <- r0+1
loc2 <- loc1 + r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
covUcurrent <- covU[c(1:r0, loc1:loc2), c(1:r0, loc1:loc2)] #(r0+r(k))*(r0+r(k))
Ycurrent <- Y[[1]]
EUcurrent <- cbind(EU[,(1:r0)], EU[,(loc1:loc2)])
Vcurrent <- cbind(V_joint[(loc3:loc4),], V_ind[[1]])
temp1 <- sum(diag(tcrossprod(Ycurrent, Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(tcrossprod(EUcurrent,Vcurrent),Ycurrent)))
temp3 <- n*sum(diag(crossprod(Vcurrent,Vcurrent)%*%covUcurrent))
temp4 <- sum(diag(crossprod(EUcurrent,EUcurrent)%*%crossprod(Vcurrent,Vcurrent)))
se2[1] <- (temp1-temp2+temp3+temp4)/(n*p[1])
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1 + r[k]-1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
covUcurrent <- covU[c(1:r0, loc1:loc2), c(1:r0, loc1:loc2)] #(r0+r(k))*(r0+r(k))
Ycurrent <- Y[[k]]
EUcurrent <- cbind(EU[,(1:r0)], EU[,(loc1:loc2)])
Vcurrent <- cbind(V_joint[(loc3:loc4),], V_ind[[k]])
temp1 <- sum(diag(tcrossprod(Ycurrent, Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(tcrossprod(EUcurrent,Vcurrent),Ycurrent)))
temp3 <- n*sum(diag(crossprod(Vcurrent,Vcurrent)%*%covUcurrent))
temp4 <- sum(diag(crossprod(EUcurrent,EUcurrent)%*%crossprod(Vcurrent,Vcurrent)))
se2[k] <- (temp1-temp2+temp3+temp4)/(n*p[k])
}
# B and Sf
EUcurrent <- EU[,(1:r0)]
if (sparsity==1){
for(i in 1:r0){
fit <- glmnet::glmnet(X,EUcurrent[,i],lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B0[,i] <- fit$beta[,ind]
}
}else{
B0 <- solve(crossprod(X,X),t(X))%*%EUcurrent
}
covUcurrent <- covU[(1:r0), (1:r0)]
temp1 <- n*covUcurrent
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(B0, crossprod(X,X))%*%B0
temp4 <- t(B0)%*%crossprod(X,EUcurrent)
temp5 <- crossprod(EUcurrent, X)%*%B0
Sf0 <- (temp1+temp2+temp3-temp4-temp5)/n #not diagonal
loc1 <- r0+1
loc2 <- loc1+r[1]-1
EUcurrent <- EU[,(loc1:loc2)]
if(sparsity==1){
for(i in 1:r[1]){
fit <- glmnet::glmnet(X,EUcurrent[,i],lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[1]][,i] <- fit$beta[,ind]
}
}else{ # no B-sparsity
B[[1]] <- solve(crossprod(X,X),t(X))%*%EUcurrent
}
covUcurrent <- covU[(loc1:loc2), (loc1:loc2)]
temp1 <- n*covUcurrent
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(B[[1]], crossprod(X,X))%*%B[[1]]
temp4 <- t(B[[1]])%*%crossprod(X,EUcurrent)
temp5 <- crossprod(EUcurrent, X)%*%B[[1]]
Sf[[1]] <- diag(diag((temp1+temp2+temp3-temp4-temp5)/n))
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
EUcurrent <- EU[,(loc1:loc2)]
if(sparsity==1){
for(i in 1:r[k]){
fit <- glmnet::glmnet(X,EUcurrent[,i],lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[k]][,i] <- fit$beta[,ind]
}
}else{ # no B-sparsity
B[[k]] <- solve(crossprod(X,X),t(X))%*%EUcurrent
}
covUcurrent <- covU[(loc1:loc2), (loc1:loc2)]
temp1 <- n*covUcurrent
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(B[[k]], crossprod(X,X))%*%B[[k]]
temp4 <- t(B[[k]])%*%crossprod(X,EUcurrent)
temp5 <- crossprod(EUcurrent, X)%*%B[[k]]
Sf[[k]] <- diag(diag((temp1+temp2+temp3-temp4-temp5)/n))
}
#Post Standardization for V0 (and Sf0 and B0)
S4 <- RSpectra::svds(V_joint%*%tcrossprod(Sf0, V_joint),r0)
V_joint_new <- S4$u
Sf0 <- diag(S4$d,nrow=length(S4$d))
V_joint_new <- pracma::arrayfun("*", V_joint_new, sign(V_joint_new[1,])) # make sure no sign alternation
B0 <- tcrossprod(B0,V_joint)%*%V_joint_new #will sacrifice any sparsity in B0
V_joint <- V_joint_new #reorder columns of B, V, and rows/columns of Sf
for(k in 1:K){
tempA <- sort(diag(Sf[[k]]), decreasing = T, index.return = T)
temp <- tempA$x
I <- tempA$ix
Sf[[k]] <- diag(temp)
B[[k]] <- B[[k]][,I]
V_ind[[k]] <- V_ind[[k]][,I]
}
# get grandV
grandV <- pracma::blkdiag(V_ind[[1]])
for(k in 2:K){
grandV <- pracma::blkdiag(grandV, V_ind[[k]])
}
grandV <- cbind(V_joint, grandV) #sum(p)*(r0+sum(r))
#Stopping rule
diff <- PrinAngle(grandV,grandV_old)
recdiff <- cbind(recdiff,diff)
#draw loglik trend
loglik <- loglikelihood(X,Y,B0,B,V_joint,V_ind,se2,Sf0,Sf)
diff_max <- loglik-maxloglik # insurance, avoid likelihood decrease
maxloglik <- max(maxloglik,loglik)
}
#calculate EU
grandse2 <- NULL #1*sum(p)
grandSf0 <- t(diag(Sf0)) #1*r0, r0>1
grandSf <- NULL#1*sum(r)
grandV_temp <- NULL #combination of loadings: first few long columns are joint loadings, subsequent diagonal blocks are individual loadings.
grandB_temp <- NULL #concatenate B
Delta_temp <- NULL #1*sum(r)
grandse2 <- (rep(1,p[1])*se2[1])
grandSf <- t(diag(Sf[[1]]))
grandV_temp <- pracma::blkdiag(V_ind[[1]])
grandB_temp <- B[[1]]
Delta_temp <- rep(1,r[1])*(1/se2[1])
for(k in 2:K){
grandse2 <- c(grandse2, (rep(1,p[k])*se2[k]))
grandSf <- c(grandSf, t(diag(Sf[[k]])))
grandV_temp <- pracma::blkdiag(grandV_temp,V_ind[[k]])
grandB_temp <- cbind(grandB_temp,B[[k]])
Delta_temp <- c(Delta_temp,rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 #1*sum(p)
grandSf_inv <- 1/grandSf #1*sum(r)
grandSf0_inv <- 1/grandSf0 #1*r0
grandV <- cbind(V_joint, grandV_temp) #sum(p)*(r0+sum(r))
grandB <- cbind(B0,grandB_temp) #q*(r0+sum(r))
Delta1 <- pracma::arrayfun("*", t(grandV), grandse2_inv)%*%grandV # [r0+sum(r)]*[r0+sum(r)]
Delta2_inv <- solve(diag(c(grandSf0_inv,grandSf_inv))+Delta1)
temp <- grandV%*%tcrossprod(Delta2_inv,grandV) #sum(p)*sum(p)
SigmaY_inv <- diag(grandse2_inv)-pracma::arrayfun("*",pracma::arrayfun("*",temp,grandse2_inv),t(grandse2_inv)) #sum(p)*sum(p), not diagonal because of common structure, diff from SupSVD
VSigmaYinvV <- Delta1 - Delta1%*%Delta2_inv%*%Delta1 #(r0+sum(r))*(r0+sum(r))
EU <- X%*%grandB%*%(diag(sum(r)+r0)-pracma::arrayfun("*",VSigmaYinvV,c(grandSf0,grandSf)))+grandY%*%SigmaY_inv%*%pracma::arrayfun("*",grandV,c(grandSf0,grandSf)) #n*(r0+sum(r)), conditional mean
#Print convergence information
if(niter < max_niter){
cat("SIPCA_A converges after",pracma::num2str(niter), "iterations.")
}else{
cat("SIPCA_A NOT converge after", pracma::num2str(max_niter), "iterations!!! Final change in angle:",pracma::num2str(diff))
}
return(list(B0=B0,B=B,V_join=V_joint,V_ind=V_ind, se2=se2, Sf0=Sf0,Sf=Sf, EU=EU))
}
###BBB
else if(type=="B"){
S <- RSpectra::svds(grandY,r0)
U_joint_ini <- S$u
D_joint_ini <- diag(S$d,nrow=length(S$d))
V_joint_ini <- S$v
# note: segments of V_joint_ini corresponding to each data set may not be orthogonal as desired
U_joint_ini <- U_joint_ini%*%D_joint_ini
B0 <- matrix(0,q,r0)
if (sparsity ==1){
for (i in 1:r0){
# tic
#Attention: lasso fcn alwayas center X,Y, and coefficients are calcualted based on centered X and Y. The function will return a separate column for intercept; if we center data outside the function, the intercept will be nearly 0.
fit <- glmnet::glmnet(X, U_joint_ini[,i], alpha = 1, lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B0[,i] <- fit$beta[,ind]
#toc
}
}else{
# if sparsity = 0, no B-sparsity
B0 <- solve(crossprod(X,X), t(X))%*%U_joint_ini
}
Sf0 <- diag(apply((U_joint_ini-X%*%B0)^2,2,stats::sd),nrow=length(apply((U_joint_ini-X%*%B0)^2,2,stats::sd)))
V_ind <- list()
B <- list()
for(k in 1:K){
B[[k]] <- matrix(0,q,r[k])
}
Sf <- list()
se2 <- rep(0,K)
loc3 <- 1
loc4 <- sum(p[1:1])
Ycurrent <- Y[[1]]-tcrossprod(U_joint_ini,V_joint_ini[loc3:loc4,])
S2 <- RSpectra::svds(Ycurrent,r[1])
U_k_ini <- S2$u
D_k_ini <- diag(S2$d,nrow=length(S2$d))
V_k_ini <- S2$v
U_k_ini <- U_k_ini%*%D_k_ini
V_ind[[1]] <- V_k_ini
if (sparsity==1){
for(i in 1:r[1]){
fit <- glmnet::glmnet(X, U_k_ini[,i], alpha = 1, lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[1]][,i] <- fit$beta[,ind]
}
}else{
# if sparsity = 0, no B-sparsity
B[[1]] <- solve(crossprod(X,X),t(X))%*%U_k_ini
}
Sf[[1]] <- diag(apply((U_k_ini - X%*%B[[1]])^2,2,stats::sd))
se2[1] <- norm(Ycurrent-tcrossprod(U_k_ini,V_k_ini), type = "F")^2/(n*p[1])
for(k in 2:K){
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
Ycurrent <- Y[[k]]-tcrossprod(U_joint_ini,V_joint_ini[loc3:loc4,])
S2 <- RSpectra::svds(Ycurrent,r[k])
U_k_ini <- S2$u
D_k_ini <- diag(S2$d,nrow=length(S2$d))
V_k_ini <- S2$v
U_k_ini <- U_k_ini%*%D_k_ini
V_ind[[k]] <- V_k_ini
if (sparsity==1){
for(i in 1:r[k]){
fit <- glmnet::glmnet(X, U_k_ini[,i], alpha = 1, lambda.min.ratio = 0, standardize = TRUE)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[k]][,i] <- fit$beta[,ind]
}
}else{
# if sparsity = 0, no B-sparsity
B[[k]] <- solve(crossprod(X,X),t(X))%*%U_k_ini
}
Sf[[k]] <- diag(apply((U_k_ini - X%*%B[[k]])^2,2,stats::sd))
se2[k] <- norm(Ycurrent-tcrossprod(U_k_ini,V_k_ini), type = "F")^2/(n*p[k])
}
#postprocess V_joint_ini such that it follows our identifiavility condition
V_joint <- matrix(0,sum(p),r0)
loc3 <- 1
loc4 <- sum(p[1:1])
V_joint_k <- GramSchmidt(as.matrix(V_joint_ini[loc3:loc4,]),V_ind[[1]])$Q1
V_joint[loc3:loc4,] <- V_joint_k*(1/sqrt(K))
for (k in 2:K){
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
V_joint_k <- GramSchmidt(as.matrix(V_joint_ini[loc3:loc4,]),V_ind[[k]])$Q1
V_joint[loc3:loc4,] <- V_joint_k*(1/sqrt(K))
}
grandV <- NULL
grandV <- pracma::blkdiag(V_ind[[1]])
for(k in 2:K){
grandV <- pracma::blkdiag(grandV,V_ind[[k]])
}
grandV <- cbind(V_joint, grandV) # sum(p)*(r0+sum(r))
#disp('Initial set done!')
loglik <- loglikelihood(X,Y,B0,B,V_joint,V_ind,se2,Sf0,Sf)
recloglik <- loglik
maxloglik <- loglik
niter <- 0
diff <- Inf
diff_max <- Inf
tol_thres <- -10
recdiff <- NULL
recdiff_grandV <- NULL
recdiff_V_joint <- NULL
recdiff_V1 <- NULL
recdiff_V2 <- NULL
while(niter<=max_niter && abs(diff)>convg_thres)#&& diff_max>tol_thres
{
cat(sprintf('This is the %.d th iterations, the diff= %.4g \n',niter,diff))
niter <- niter+1
#record last iter
#loglik_old <- loglik
grandV_old <- grandV
V1_old <- V_ind[[1]]
V2_old <- V_ind[[2]]
#E step
grandse2 <- NULL #1*sum(p)
grandSf <- t(diag(Sf0)) # 1*(r0+sum(r))
# grandV_temp = NULL #combination of loadings: first few long columns are joint loadings, subsequent diagonal blocks are individual loadings
grandB_temp <- NULL #concatenate B
Delta_temp <- rep(1,r0)*sum(se2^(-1))/K
for(k in 1:K){
grandse2 <- c(grandse2, (rep(1,p[k])*se2[k]))
grandSf <- c(grandSf,t(diag(Sf[[k]])))
# grandV_temp <- pracma::blkdiag(grandV_temp, V_ind[k])
grandB_temp <- cbind(grandB_temp, B[[k]])
Delta_temp <- c(Delta_temp, rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 #1*sum(p)
grandSf_inv <- 1/grandSf #1*(r0+sum(r))
# grandV <- cbind(V_joint, grandV_temp) #sum(p)*(r0+sum(r))
grandB <- cbind(B0, grandB_temp)#q*(r0+sum(r))
Delta1 <- Delta_temp #bsxfun(@times, grandV', grandse2_inv)*grandV; % 1*(r0+sum(r))
Delta2 <- grandSf_inv+Delta1 #1*(r0+sum(r))
temp <- tcrossprod(pracma::arrayfun("*",grandV, (1/Delta2)),grandV) #sum(p)*sum(p)
SigmaY_inv <- diag(grandse2_inv)-pracma::arrayfun("*",pracma::arrayfun("*",temp, grandse2_inv),t(grandse2_inv)) #sum(p)*sum(p), not diagonal because of common structure, diff from SupSVD
VSigmaYinvV <- Delta1 - Delta1*(1/Delta2)*Delta1 #1*(r0+sum(r))
EU <- X%*%pracma::arrayfun("*",grandB, (rep(1,sum(r)+r0)-VSigmaYinvV*grandSf))+grandY%*%SigmaY_inv%*%pracma::arrayfun("*",grandV,grandSf) # conditional mean
covU <- grandSf-grandSf*VSigmaYinvV*grandSf #1*(r0+sum(r)), conditional variance (turns out to be a diagonal matrix)
# M step
# V and se2
loc1 <- r0+1
loc2 <- loc1+r[1]-1
loc3 <- 1
loc4 <- sum(p[1:1])
Ycurrent <- Y[[1]]
EUcurrent <- cbind(EU[,1:r0],EU[,loc1:loc2])
EUcurrent_star <- cbind((1/sqrt(K))*EU[,1:r0],EU[,loc1:loc2])
#V
S3 <- RSpectra::svds(crossprod(Ycurrent,EUcurrent_star),(r0+r[1]))
tempL <- S3$u
tempR <- S3$v
Vcurrent_star <- tcrossprod(tempL,tempR) # should have orthonormal columns
V_joint[loc3:loc4,] <- Vcurrent_star[,1:r0,drop=F]*(1/sqrt(K))
V_ind[[1]] <- Vcurrent_star[,(r0+1):(r0+r[1])]
Vcurrent <- cbind(V_joint[loc3:loc4,],V_ind[[1]])
# clear EUcurrent_star Vcurrent_star
# se2 (directly from SupSVD formula)
covUcurrent <- covU[c(1:r0,loc1:loc2)] #1*(r0+r[k])
temp1 <- sum(diag(tcrossprod(Ycurrent,Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(EUcurrent,Vcurrent)%*%t(Ycurrent)))
temp3 <- n*sum(diag((crossprod(Vcurrent,Vcurrent))%*%diag(covUcurrent)))
temp4 <- sum(diag((crossprod(EUcurrent,EUcurrent))%*%(crossprod(Vcurrent,Vcurrent))))
se2[1] <- (temp1 - temp2+temp3+temp4)/(n*p[1])
for (k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
loc3 <- sum(p[1:(k-1)])+1
loc4 <- sum(p[1:k])
Ycurrent <- Y[[k]]
EUcurrent <- cbind(EU[,1:r0],EU[,loc1:loc2])
EUcurrent_star <- cbind((1/sqrt(K))*EU[,1:r0],EU[,loc1:loc2])
#V
S3 <- RSpectra::svds(crossprod(Ycurrent,EUcurrent_star),(r0+r[k]))
tempL <- S3$u
tempR <- S3$v
Vcurrent_star <- tcrossprod(tempL,tempR) # should have orthonormal columns
V_joint[loc3:loc4,] <- Vcurrent_star[,1:r0,drop=F]*(1/sqrt(K))
V_ind[[k]] <- Vcurrent_star[,(r0+1):(r0+r[k])]
Vcurrent <- cbind(V_joint[loc3:loc4,],V_ind[[k]])
# clear EUcurrent_star Vcurrent_star
# se2 (directly from SupSVD formula)
covUcurrent <- covU[c(1:r0,loc1:loc2)] #1*(r0+r[k])
temp1 <- sum(diag(tcrossprod(Ycurrent,Ycurrent)))
temp2 <- 2*sum(diag(tcrossprod(EUcurrent,Vcurrent)%*%t(Ycurrent)))
temp3 <- n*sum(diag((crossprod(Vcurrent,Vcurrent))%*%diag(covUcurrent)))
temp4 <- sum(diag((crossprod(EUcurrent,EUcurrent))%*%(crossprod(Vcurrent,Vcurrent))))
se2[k] <- (temp1 - temp2+temp3+temp4)/(n*p[k])
}
# B and Sf
EUcurrent <- EU[,1:r0]
if(sparsity ==1){
cat("Start estimating B0...")
for(i in 1:r0){
fit <- glmnet::glmnet(X,EUcurrent[,i],lambda.min.ratio = 0, standardize = TRUE,alpha=1)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B0[,i] <- fit$beta[,ind]
}
cat("Finish estimating B0!")
}else{
# if sparsity = 0, no B-sparsity
B0 <- solve(crossprod(X,X),t(X))%*%EUcurrent
}
covUcurrent <- covU[1:r0]
temp1 <- n*diag(covUcurrent,nrow=length(covUcurrent))
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(B0,crossprod(X,X))%*%B0
temp4 <- t(B0)%*%crossprod(X,EUcurrent)
temp5 <- crossprod(EUcurrent,X)%*%B0
Sf0 <- diag(diag(temp1+temp2+temp3-temp4-temp5)/n,nrow=length(diag(temp1+temp2+temp3-temp4-temp5)/n))
loc1 <- r0+1
loc2 <- loc1+r[1]-1
EUcurrent <- EU[,loc1:loc2]
if(sparsity == 1){
#cat("start estimating B", pracma::num2str(k),"...")
for(i in 1:r[1]){
fit <- glmnet::glmnet(X,EUcurrent[,i],lambda.min.ratio = 0, standardize = TRUE,alpha=1)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[1]][,i] <- fit$beta[,ind]
}
}else{ # no B-sparsity
B[[1]] <- solve(crossprod(X,X),t(X))%*%EUcurrent
}
covUcurrent <- covU[loc1:loc2]
temp1 <- n*diag(covUcurrent)
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(B[[1]],crossprod(X,X))%*%B[[1]]
temp4 <- t(B[[1]])%*%crossprod(X,EUcurrent)
temp5 <- crossprod(EUcurrent,X)%*%B[[1]]
Sf[[1]] <- diag(diag(temp1+temp2+temp3-temp4-temp5)/n)
for(k in 2:K){
loc1 <- r0+sum(r[1:(k-1)])+1
loc2 <- loc1+r[k]-1
EUcurrent <- EU[,loc1:loc2]
if(sparsity == 1){
#cat("start estimating B", pracma::num2str(k),"...")
for(i in 1:r[k]){
fit <- glmnet::glmnet(X,EUcurrent[,i],lambda.min.ratio = 0, standardize = TRUE,alpha=1)
BIC_score <- n*stats::deviance(fit)+log(n)*fit$df
ind <- which(BIC_score == min(BIC_score))
B[[k]][,i] <- fit$beta[,ind]
}
}else{ # no B-sparsity
B[[k]] <- solve(crossprod(X,X),t(X))%*%EUcurrent
}
covUcurrent <- covU[loc1:loc2]
temp1 <- n*diag(covUcurrent)
temp2 <- crossprod(EUcurrent,EUcurrent)
temp3 <- crossprod(B[[k]],crossprod(X,X))%*%B[[k]]
temp4 <- t(B[[k]])%*%crossprod(X,EUcurrent)
temp5 <- crossprod(EUcurrent,X)%*%B[[k]]
Sf[[k]] <- diag(diag(temp1+temp2+temp3-temp4-temp5)/n)
}
#reorder columns of B,V,and rows/columns of Sf
tempA <- sort(diag(Sf0), decreasing = T, index.return = T)
temp <- tempA$x
I <- tempA$ix
Sf0 <- diag(temp)
B0 <- B0[,I]
V_joint <- V_joint[,I,drop=F]
for(k in 1:K){
tempB <- sort(diag(Sf[[k]]), decreasing = T, index.return = T)
temp <- tempB$x
I <- tempB$ix
Sf[[k]] <- diag(temp)
B[[k]]<-B[[k]][,I]
V_ind[[k]] <- V_ind[[k]][,I]
}
#calc grandV
grandV <- NULL
grandV<-pracma::blkdiag(V_ind[[1]])
for(k in 2:K){
grandV<-pracma::blkdiag(grandV,V_ind[[k]])
}
grandV <- cbind(V_joint,grandV) #sum(p)*(r0+sum(r))
#stopping rule
diff <- PrinAngle(grandV,grandV_old)
recdiff <- cbind(recdiff,diff)
#diff
#draw loglik trend
loglik <- loglikelihood(X,Y,B0,B,V_joint,V_ind,se2,Sf0,Sf)
diff_max<- loglik-maxloglik #insurance, avoid likelihood decrease
maxloglik <- max(maxloglik,loglik)
}
#calc EU
grandse2 <- NULL #1*sum(p)
grandSf <- t(diag(Sf0)) #1*(r0+sum(r))
grandV_temp <- NULL #combination of loadings: first few long columns are joint loading, subsequent diagonal blocks are individual loadings
grandB_temp <- NULL #concatenate B
Delta_temp <- rep(1,r0)*sum(se2^(-1))/K
grandse2 <- c(grandse2,(rep(1,p[1])*se2[1]))
grandSf <- c(grandSf,t(diag(Sf[[1]])))
grandV_temp <- pracma::blkdiag(V_ind[[1]])
grandB_temp <- cbind(grandB_temp,B[[1]])
Delta_temp <- c(Delta_temp,rep(1,r[1])*(1/se2[1]))
for(k in 2:K){
grandse2 <- c(grandse2,(rep(1,p[k])*se2[k]))
grandSf <- c(grandSf,t(diag(Sf[[k]])))
grandV_temp <- pracma::blkdiag(grandV_temp,V_ind[[k]])
grandB_temp <- cbind(grandB_temp,B[[k]])
Delta_temp <- c(Delta_temp,rep(1,r[k])*(1/se2[k]))
}
grandse2_inv <- 1/grandse2 #1*sum(p)
grandSf_inv <- 1/grandSf #1*(r0+sum(r))
grandV <- cbind(V_joint, grandV_temp)#sum(p)*(r0+sum(r))
grandB <- cbind(B0,grandB_temp) #q*(r0+sum(r))
Delta1 <- Delta_temp #bsxfun("*", grandV',grandse2_inv)%*%grandV #1*(r0+sum(r))
Delta2 <- grandSf_inv+Delta1 #1*(r0+sum(r))
temp <- tcrossprod(pracma::arrayfun("*",grandV, (1/Delta2)),grandV) #sum(p)*sum(p)
SigmaY_inv <- diag(grandse2_inv)-pracma::arrayfun("*",pracma::arrayfun("*",temp,grandse2_inv),t(grandse2_inv)) #sum(p)*sum(p), not diagonal because of common structure, diff from SupSVD
VSigmaYinvV <- Delta1-Delta1*(1/Delta2)*Delta1 #1*(r0+sum(r))
EU <- X%*%pracma::arrayfun("*",grandB,(rep(1,sum(r)+r0)-VSigmaYinvV*grandSf))+grandY%*%SigmaY_inv%*%pracma::arrayfun("*",grandV,grandSf) # conditional mean
#Print convergence information
if(niter<max_niter){
cat("SIPCA_B converges after",pracma::num2str(niter),"iterations.")
}else{
cat("SIPCA_B NOT converge after",pracma::num2str(max_niter),"iterations!!! Final change in angle: ",pracma::num2str(diff))
}
return(list(B0=B0, B=B, V_joint=V_joint,V_ind=V_ind,se2=se2,Sf0=Sf0,Sf=Sf,EU=EU))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.