Nothing
##' Sparse orthogonal factor regression
##'
##' Compute solution paths of sparse orthogonal factor regression
##'
##' The model parameters can be specified through argument \code{modstr}.
##' The available elements include
##' \itemize{
##' \item{mu}: parameter in the augmented Lagrangian function.
##' \item{mugamma}: increament of mu along iterations to speed up
##' computation.
##' \item{WA}: weight matrix for A.
##' \item{WB}: weight matrix for B.
##' \item{Wd}: weight matrix for d.
##' \item{wgamma}: power parameter in constructing adaptive weights.
##' }
##' The model fitting can be controled through argument \code{control}.
##' The avilable elements include
##' \itemize{
##' \item{nlam}: number of lambda triplets to be used.
##' \item{lam.min.factor}: set the smallest lambda triplets as a fraction of the
##' estimation lambda.max triplets.
##' \item{lam.max.factor}: set the largest lambda triplets as a multiple of the
##' estimation lambda.max triplets.
##' \item{lam.AB.factor}: set the relative penalty level between A/B and D.
##' \item{penA,penB,penD}: if TRUE, penalty is applied.
##' \item{lamA}: sequence of tuning parameters for A.
##' \item{lamB}: sequence of tuning parameters for B.
##' \item{lamD}: sequence of tuning parameters for d.
##' \item{methodA}: penalty for penalizing A.
##' \item{methodB}: penalty for penalizing B.
##' \item{epsilon}: convergence tolerance.
##' \item{maxit}: maximum number of iterations.
##' \item{innerEpsilon}: convergence tolerance for inner subroutines.
##' \item{innerMaxit}: maximum number of iterations for inner subroutines.
##' \item{sv.tol}: tolerance for singular values.
##' }
##'
##' @param Y response matrix
##' @param X covariate matrix
##' @param nrank an integer specifying the desired rank/number of factors
##' @param su a scaling vector for U such that \eqn{U^TU = diag(s_u)}.
##' @param sv a scaling vector for V such that \eqn{V^TV = diag(s_v)}.
##' @param ic.type select tuning method; the default is GIC
##' @param modstr a list of internal model parameters controlling the model
##' fitting
##' @param control a list of internal computation parameters controlling
##' optimization
##' @param screening If TRUE, marginal screening via lasso is performed before
##' sofar fitting.
##'
##' @return
##' A \code{sofar} object containing
##' \item{call}{original function call}
##' \item{Y}{input response matrix}
##' \item{X}{input predictor matrix}
##' \item{Upath}{solution path of U}
##' \item{Dpath}{solution path of D}
##' \item{Vpath}{solution path of D}
##' \item{Rpath}{path of estimated rank}
##' \item{icpath}{path of information criteria}
##' \item{lam.id}{ids of selected lambda for GIC, BIC, AIC and GCV}
##' \item{p.index}{ids of predictors which passed screening}
##' \item{q.index}{ids of responses which passed screening}
##' \item{lamA}{tuning sequence for A}
##' \item{lamB}{tuning sequence for B}
##' \item{lamD}{tuning sequence for D}
##' \item{U}{estimated left singular matrix that is orthogonal (factor weights)}
##' \item{V}{estimated right singular matrix that is orthogonal (factor loadings)}
##' \item{D}{estimated singular values}
##' \item{rank}{estimated rank}
##'
##' @references
##'
##' Uematsu, Y., Fan, Y., Chen, K., Lv, J., & Lin, W. (2019). SOFAR: large-scale
##' association network learning. \emph{IEEE Transactions on Information
##' Theory}, 65(8), 4924--4939.
##'
##' @examples
##' \dontrun{
##' library(rrpack)
##' ## Simulate data from a sparse factor regression model
##' p <- 100; q <- 50; n <- 100; nrank <- 3
##' mydata <- rrr.sim1(n, p, q, nrank, s2n = 1,
##' sigma = NULL, rho_X = 0.5, rho_E = 0.3)
##' Y <- mydata$Y
##' X <- mydata$X
##'
##' fit1 <- sofar(Y, X, ic.type = "GIC", nrank = nrank + 2,
##' control = list(methodA = "adlasso", methodB = "adlasso"))
##' summary(fit1)
##' plot(fit1)
##'
##' fit1$U
##' crossprod(fit1$U) #check orthogonality
##' fit1$V
##' crossprod(fit1$V) #check orthogonality
##' }
##'
##' @export
sofar <- function(Y,
X,
nrank = 1,
su = NULL, sv = NULL,
ic.type = c("GIC", "BIC", "AIC", "GCV"),
modstr = list(),
control = list(),
screening = FALSE)
{
Call <- match.call()
p <- ncol(X)
q <- ncol(Y)
n <- nrow(Y)
if(is.null(su)) su <- rep(1,nrank)
# Su_sqrt <- diag(nrow=nrank, ncol=nrank)
# #Su_sqrt_inv <- diag(nrow=nrank, ncol=nrank)
# }else{
# Su_sqrt <- diag(sqrt(su),nrow=nrank, ncol=nrank)
# #Su_sqrt_inv <- diag(1/sqrt(su),nrow=nrank, ncol=nrank)
# }
if(is.null(sv)) sv <- rep(1,nrank)
# Sv_sqrt <- diag(nrow=nrank, ncol=nrank)
# #Sv_sqrt_inv <- diag(nrow=nrank, ncol=nrank)
# }else{
# Sv_sqrt <- diag(sqrt(sv),nrow=nrank, ncol=nrank)
# #Sv_sqrt_inv <- diag(1/sqrt(sv),nrow=nrank, ncol=nrank)
# }
## model parameters
modstr <- do.call("sofar.modstr", modstr)
mu <- modstr$mu
mugamma <- modstr$mugamma
WA <- modstr$WA
WB <- modstr$WB
Wd <- modstr$Wd
wgamma <- modstr$wgamma
## control
control <- do.call("sofar.control", control)
nlam <- control$nlam
lam.min.factor <- control$lam.min.factor
lam.max.factor <- control$lam.max.factor
lam.AB.factor <- control$lam.AB.factor
penA <- control$penA
penB <- control$penB
penD <- control$penD
lamA <- control$lamA
lamB <- control$lamB
lamD <- control$lamD
methodA <- control$methodA
methodB <- control$methodB
epsilon <- control$epsilon
maxit <- control$maxit
innerEpsilon <- control$innerEpsilon
innerMaxit <- control$innerMaxit
tol <- control$sv.tol
## Initial screening from lasso
if(screening == TRUE){
ini.screen <- sofar.init(Y, X, nrank)
p.index <- ini.screen$p.index
q.index <- ini.screen$q.index
} else{
p.index <- 1:p
q.index <- 1:q
}
## U <- init$coef.svd$U
## V <- init$coef.svd$V
## D <- init$coef.svd$D
## Reduce the problem and compute lambda sequence
Y.reduced <- Y[, q.index]
X.reduced <- X[, p.index]
q.reduced <- length(q.index)
p.reduced <- length(p.index)
## Construct weights and initial value.
ini <- NULL
if (is.null(WA) || is.null(WB) || is.null(Wd)) {
if (is.null(ini))
ini <-
rrr.fit(Y.reduced, X.reduced, nrank, coefSVD = TRUE)
WA <- switch(
methodA,
lasso = matrix(1., p.reduced, nrank),
adlasso = (
ini$coefSVD$u %*%
diag(ini$coefSVD$d / sqrt(sv),
nrow = length(ini$coefSVD$d)) + 1e-8
) ^ (-wgamma),
glasso = rep(1., p.reduced),
adglasso = sqrt(rowSums((
ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
WB <- switch(
methodB,
lasso = matrix(1., q.reduced, nrank),
adlasso = (ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d)) + 1e-8
) ^ (-wgamma),
glasso = rep(1., q.reduced),
adglasso = sqrt(rowSums((
ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
if (substr(methodA, 1, 2) == "ad" ||
substr(methodB, 1, 2) == "ad") {
if (is.null(ini))
ini <-
rrr.fit(Y.reduced, X.reduced, nrank, coefSVD = TRUE)
Wd <- (ini$coefSVD$d / sqrt(su) / sqrt(sv) + 1e-8) ^ (- wgamma)
} else {
Wd <- rep(1., nrank)
}
modstr$WA <- WA
modstr$WB <- WB
modstr$Wd <- Wd
}
## if (is.null(init$U) || is.null(init$V) || is.null(init$D)) {
U <- t(sqrt(su)*t(ini$coefSVD$u))
V <- t(sqrt(sv)*t(ini$coefSVD$v))
D <- ini$coefSVD$d/sqrt(su)/sqrt(sv)
init <-
list(
U = U,
V = V,
D = D
)
## } else {
## U <- init$U
## V <- init$V
## D <- init$D
## }
## Compute lambda sequence
if (is.null(lamA) || is.null(lamB) || is.null(lamD)) {
lammax <- sofar.lammax(
Y.reduced,
X.reduced,
U = U,
V = V,
D = diag(D, nrow = nrank),
nrank = nrank,
su = su,
sv = sv,
mu = mu,
mugamma = mugamma,
conv = 1e-4,
maxit = maxit,
methodA = methodA,
methodB = methodB,
WA = WA,
WB = WB,
Wd = Wd,
wgamma = 2
)
lamA.max <- lammax$lamA.max
lamB.max <- lammax$lamB.max
lamD.max <- lammax$lamD.max
if (penA) {
lamA <- exp(seq(
log(lamA.max * lam.min.factor),
log(lamA.max * lam.max.factor),
length = nlam
))
} else {
lamA <- rep(0, nlam)
}
if (penB) {
lamB <- exp(seq(
log(lamB.max * lam.min.factor * lam.AB.factor),
log(lamB.max * lam.max.factor * lam.AB.factor),
length = nlam
))
} else {
lamB <- rep(0, nlam)
}
if (penD) {
lamD <- exp(seq(
log(lamD.max * lam.min.factor),
log(lamD.max * lam.max.factor),
length = nlam
))
} else {
lamD <- rep(0, nlam)
}
} else {
nlamA <- length(lamA)
nlamB <- length(lamB)
nlamD <- length(lamD)
stopifnot(nlamA == nlamB & nlamB == nlamD)
}
XX.reduced <- crossprod(X.reduced)
eigenXX.reduced <- eigen(XX.reduced)$values[1]
XY.reduced <- crossprod(X.reduced, Y.reduced)
Yvec.reduced <- as.vector(Y.reduced)
fit <- sofar.path.reduced(
Y.reduced,
X.reduced,
XX = XX.reduced,
XY = XY.reduced,
Yvec = Yvec.reduced,
eigenXX = eigenXX.reduced,
ic = ic.type,
nrank = nrank,
su = su,
sv = sv,
lamA = lamA,
lamB = lamB,
lamD = lamD,
modstr = modstr,
init = init,
control = control
)
## Results
rank <- fit$rank
if (rank > 0) {
U <- matrix(nrow = p, ncol = rank, 0)
U[p.index,] <- fit$U
V <- matrix(nrow = q, ncol = rank, 0)
V[q.index,] <- fit$V
D <- fit$D
} else{
U <- matrix(nrow = p, ncol = 1, 0)
V <- matrix(nrow = q, ncol = 1, 0)
D <- fit$D
}
ICid <- switch(
ic.type,
GIC = fit$lam.id[1],
BIC = fit$lam.id[2],
AIC = fit$lam.id[3],
GCV = fit$lam.id[4]
)
if(ICid==1) warning("Selected smallest lambda on the sequence. Decrease lambda.min.factor",call. = FALSE)
if(ICid==control$nlam) warning("Selected largest lambda on the sequence. Increase lambda.max.factor",call. = FALSE)
out <- list(
call = Call,
Y = Y,
X = X,
Upath = fit$Upath,
Vpath = fit$Vpath,
Dpath = fit$Dpath,
Rpath = fit$Rpath,
icpath = fit$ICpath,
lam.id = fit$lam.id,
p.index = p.index,
q.index = q.index,
lamA = lamA,
lamB = lamB,
lamD = lamD,
U = U,
V = V,
D = D,
rank = rank
)
class(out) <- "sofar"
out
}
##' Sparse orthognal factor regression tuned by cross validation
##'
##' Sparse orthognal factor regression tuned by cross validation
##'
##' The model parameters can be specified through argument \code{modstr}.
##' The available elements include
##' \itemize{
##' \item{mu}: parameter in the augmented Lagrangian function.
##' \item{mugamma}: increament of mu along iterations to speed up
##' computation.
##' \item{WA}: weight matrix for A.
##' \item{WB}: weight matrix for B.
##' \item{Wd}: weight matrix for d.
##' \item{wgamma}: power parameter in constructing adaptive weights.
##' }
##' The model fitting can be controled through argument \code{control}.
##' The avilable elements include
##' \itemize{
##' \item{nlam}: number of lambda triplets to be used.
##' \item{lam.min.factor}: set the smallest lambda triplets as a fraction of
##' the estimation lambda.max triplets.
##' \item{lam.max.factor}: set the largest lambda triplets as a multiple of
##' the estimation lambda.max triplets.
##' \item{lam.AB.factor}: set the relative penalty level between A/B and D.
##' \item{penA,penB,penD}: if TRUE, penalty is applied.
##' \item{lamA}: sequence of tuning parameters for A.
##' \item{lamB}: sequence of tuning parameters for B.
##' \item{lamD}: sequence of tuning parameters for d.
##' \item{methodA}: penalty for penalizing A.
##' \item{methodB}: penalty for penalizing B.
##' \item{epsilon}: convergence tolerance.
##' \item{maxit}: maximum number of iterations.
##' \item{innerEpsilon}: convergence tolerance for inner subroutines.
##' \item{innerMaxit}: maximum number of iterations for inner subroutines.
##' \item{sv.tol}: tolerance for singular values.
##' }
##'
##' @param Y response matrix
##' @param X covariate matrix
##' @param nrank an integer specifying the desired rank/number of factors
##' @param su a scaling vector for U such that \eqn{U^{T}U = diag(s_{u})}
##' @param sv a scaling vector for V such that \eqn{V^{T}V = diag(s_{v})}
##' @param nfold number of fold; used for cv.sofar
##' @param norder observation orders to constrct data folds; used for cv.sofar
##' @param modstr a list of internal model parameters controlling the model
##' fitting
##' @param control a list of internal computation parameters controlling
##' optimization
##' @param screening If TRUE, marginal screening via lasso is performed before
##' sofar fitting.
##'
##' @export
cv.sofar <- function(Y,
X,
nrank = 1,
su = NULL,
sv = NULL,
nfold = 5,
norder = NULL,
modstr = list(),
control = list(),
screening = FALSE)
{
Call <- match.call()
p <- ncol(X)
q <- ncol(Y)
n <- nrow(Y)
if(is.null(su)) su <- rep(1,nrank)
# Su_sqrt <- diag(nrow=nrank, ncol=nrank)
# #Su_sqrt_inv <- diag(nrow=nrank, ncol=nrank)
# }else{
# Su_sqrt <- diag(sqrt(su),nrow=nrank, ncol=nrank)
# #Su_sqrt_inv <- diag(1/sqrt(su),nrow=nrank, ncol=nrank)
# }
if(is.null(sv)) sv <- rep(1,nrank)
# Sv_sqrt <- diag(nrow=nrank, ncol=nrank)
# #Sv_sqrt_inv <- diag(nrow=nrank, ncol=nrank)
# }else{
# Sv_sqrt <- diag(sqrt(sv),nrow=nrank, ncol=nrank)
# #Sv_sqrt_inv <- diag(1/sqrt(sv),nrow=nrank, ncol=nrank)
# }
## model parameters
modstr <- do.call("sofar.modstr", modstr)
mu <- modstr$mu
mugamma <- modstr$mugamma
WA <- modstr$WA
WB <- modstr$WB
Wd <- modstr$Wd
wgamma <- modstr$wgamma
## control
control <- do.call("sofar.control", control)
nlam <- control$nlam
lam.min.factor <- control$lam.min.factor
lam.max.factor <- control$lam.max.factor
lam.AB.factor <- control$lam.AB.factor
penA <- control$penA
penB <- control$penB
penD <- control$penD
lamA <- control$lamA
lamB <- control$lamB
lamD <- control$lamD
methodA <- control$methodA
methodB <- control$methodB
epsilon <- control$epsilon
maxit <- control$maxit
innerEpsilon <- control$innerEpsilon
innerMaxit <- control$innerMaxit
tol <- control$sv.tol
## initial screening from lasso
if(screening == TRUE){
ini.screen <- sofar.init(Y, X, nrank)
p.index <- ini.screen$p.index
q.index <- ini.screen$q.index
} else{
p.index <- 1:p
q.index <- 1:q
}
Y.reduced <- Y[, q.index]
X.reduced <- X[, p.index]
q.reduced <- length(q.index)
p.reduced <- length(p.index)
## Construct weights and initial value.
ini <- NULL
if (is.null(WA) || is.null(WB) || is.null(Wd)) {
if (is.null(ini))
ini <-
rrr.fit(Y.reduced, X.reduced, nrank, coefSVD = TRUE)
WA <- switch(
methodA,
lasso = matrix(1., p.reduced, nrank),
adlasso = (ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))
) ^ (-wgamma),
glasso = rep(1., p.reduced),
adglasso = sqrt(rowSums((
ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
WB <- switch(
methodB,
lasso = matrix(1., q.reduced, nrank),
adlasso = (ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))
) ^ (-wgamma),
glasso = rep(1., q.reduced),
adglasso = sqrt(rowSums((
ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
if (substr(methodA, 1, 2) == "ad" ||
substr(methodB, 1, 2) == "ad") {
if (is.null(ini))
ini <-
rrr.fit(Y.reduced, X.reduced, nrank, coefSVD = TRUE)
Wd <- (ini$coefSVD$d/sqrt(su)/sqrt(sv)) ^ (-wgamma)
} else {
Wd <- rep(1., nrank)
}
modstr$WA <- WA
modstr$WB <- WB
modstr$Wd <- Wd
}
## if (is.null(init$U) || is.null(init$V) || is.null(init$D)) {
U <- t(sqrt(su)*t(ini$coefSVD$u))
V <- t(sqrt(sv)*t(ini$coefSVD$v))
D <- ini$coefSVD$d/sqrt(su)/sqrt(sv)
init <-
list(
U = U,
V = V,
D = D
)
## } else {
## U <- init$U
## V <- init$V
## D <- init$D
## }
## Compute lambda sequence
if (is.null(lamA) || is.null(lamB) || is.null(lamD)) {
lammax <- sofar.lammax(
Y.reduced,
X.reduced,
U = U,
V = V,
D = diag(D, nrow = nrank),
nrank = nrank,
su = su,
sv = sv,
mu = mu,
mugamma = mugamma,
conv = 1e-4,
maxit = maxit,
methodA = methodA,
methodB = methodB,
WA = WA,
WB = WB,
Wd = Wd,
wgamma = 2
)
lamA.max <- lammax$lamA.max
lamB.max <- lammax$lamB.max
lamD.max <- lammax$lamD.max
if (penA) {
lamA <- exp(seq(
log(lamA.max * lam.min.factor),
log(lamA.max * lam.max.factor),
length = nlam
))
} else {
lamA <- rep(0, nlam)
}
if (penB) {
lamB <- exp(seq(
log(lamB.max * lam.min.factor * lam.AB.factor),
log(lamB.max * lam.max.factor * lam.AB.factor),
length = nlam
))
} else {
lamB <- rep(0, nlam)
}
if (penD) {
lamD <- exp(seq(
log(lamD.max * lam.min.factor),
log(lamD.max * lam.max.factor),
length = nlam
))
} else {
lamD <- rep(0, nlam)
}
} else {
nlamA <- length(lamA)
nlamB <- length(lamB)
nlamD <- length(lamD)
stopifnot(nlamA == nlamB & nlamB == nlamD)
}
XX.reduced <- crossprod(X.reduced)
eigenXX.reduced <- eigen(XX.reduced)$values[1]
XY.reduced <- crossprod(X.reduced, Y.reduced)
Yvec.reduced <- as.vector(Y.reduced)
fit <- sofar.cv(
Y.reduced,
X.reduced,
XX = XX.reduced,
XY = XY.reduced,
Yvec = Yvec.reduced,
eigenXX = eigenXX.reduced,
nrank = nrank,
nfold = nfold,
su = su,
sv = sv,
norder = norder,
lamA = lamA,
lamB = lamB,
lamD = lamD,
modstr = modstr,
init = init,
control = control
)
## Results
rank <- fit$rank
if (rank > 0) {
U <- matrix(nrow = p, ncol = rank, 0)
U[p.index, ] <- fit$U
V <- matrix(nrow = q, ncol = rank, 0)
V[q.index, ] <- fit$V
D <- fit$D
} else {
U <- matrix(nrow = p, ncol = 1, 0)
V <- matrix(nrow = q, ncol = 1, 0)
D <- fit$D
}
if(fit$lam.id==1) warning("Selected smallest lambda on the sequence. Decrease lambda.min.factor",call. = FALSE)
if(fit$lam.id==control$nlam) warning("Selected largest lambda on the sequence. Increase lambda.max.factor", call. = FALSE)
out <- list(
call = Call,
Y = Y,
X = X,
crpath = fit$cr.path,
crerror = fit$cr.error,
norder = fit$norder,
Upath = fit$Upath,
Vpath = fit$Vpath,
Dpath = fit$Dpath,
Rpath = fit$Rpath,
icpath = fit$ICpath,
lam.id = fit$lam.id,
#lammax = lammax,
p.index = p.index,
q.index = q.index,
lamA = lamA,
lamB = lamB,
lamD = lamD,
U = U,
V = V,
D = D,
rank = rank
)
class(out) <- "cv.sofar"
out
}
### internal functions =========================================================
## Internal function for specifying model parameters
##
## a list of internal model parameters controlling the model fitting
##
## @param mu parameter in the augmented Lagrangian function
## @param mugamma increament of mu along iterations to speed up computation
## @param WA weight matrix for A
## @param WB weight matrix for B
## @param Wd weight matrix for d
## @param wgamma power parameter in constructing adaptive weights
##
## @return a list of model parameters.
sofar.modstr <- function(mu = 1,
mugamma = 1.1,
WA = NULL,
WB = NULL,
Wd = NULL,
wgamma = 2) {
list(
mu = mu,
mugamma = mugamma,
WA = WA,
WB = WB,
Wd = Wd,
wgamma = wgamma
)
}
## Internal function for specifying computation parameters
##
## a list of internal computational parameters controlling optimization
## @param nlam number of lambda triplets to be used
## @param lam.min.factor set the smallest lambda triplets as a fraction of the
## estimation lambda.max triplets
## @param lam.max.factor set the largest lambda triplets as a multiple of the
## estimation lambda.max triplets
## @param lam.AB.factor set the relative penalty level between A/B and D
## @param penA,penB,penD if TRUE, penalty is applied
## @param lamA sequence of tuning parameters for A
## @param lamB sequence of tuning parameters for B
## @param lamD sequence of tuning parameters for d
## @param methodA penalty for penalizing A
## @param methodB penalty for penalizing B
## @param epsilon convergence tolerance
## @param maxit maximum number of iterations
## @param innerEpsilon convergence tolerance for inner subroutines
## @param innerMaxit maximum number of iterations for inner subroutines
## @param sv.tol tolerance for singular values
##
## @return a list of computational parameters.
sofar.control <- function(nlam = 50,
lam.min.factor = 1e-4,
lam.max.factor = 10,
lam.AB.factor = 1,
penA = TRUE,
penB = TRUE,
penD = TRUE,
lamA = NULL,
lamB = NULL,
lamD = NULL,
methodA = "adlasso",
methodB = "adlasso",
epsilon = 1e-3,
maxit = 200L,
innerEpsilon = 1e-3,
innerMaxit = 50L,
sv.tol = 1e-02)
{
list(
nlam = nlam,
lam.min.factor = lam.min.factor,
lam.max.factor = lam.max.factor,
lam.AB.factor = lam.AB.factor,
penA = penA,
penB = penB,
penD = penD,
lamA = lamA,
lamB = lamB,
lamD = lamD,
methodA = methodA,
methodB = methodB,
epsilon = epsilon,
maxit = maxit,
innerEpsilon = innerEpsilon,
innerMaxit = innerMaxit,
sv.tol = sv.tol
)
}
### Preparation
### Generate initial values.
##' @importFrom stats coef
##' @importFrom glmnet cv.glmnet
sofar.init <- function(Y, X, nrank = 1)
{
p <- ncol(X)
q <- ncol(Y)
n <- nrow(Y)
## 1. Use glmnet to get initial values.
## 2. Decompose to get initial values
coef.lasso <- matrix(0, nrow = p, ncol = q)
for (i in seq_len(q)) {
fiti <- cv.glmnet(X, Y[, i], intercept = FALSE, standardize = FALSE)
coef.lasso[, i] <- as.vector(coef(fiti))[-1]
}
p.index <- which(apply(coef.lasso, 1, lnorm) != 0)
q.index <- which(apply(coef.lasso, 2, lnorm) != 0)
## Y.reduced <- Y[,q.index]
## X.reduced <- X[,p.index]
## coef.lasso.reduced <- coef.lasso[p.index,q.index]
## if(ini.type == "xc"){
## fit.rrr.reduced <- rrr.fit(Y=Y.reduced,
## X=X.reduced,
## nrank = nrank)
## fit.rrr.reduced <- rrr.fit(Y=X.reduced%*%coef.lasso.reduced,
## X=X.reduced,
## nrank = nrank)
## coef.rrr.reduced <- fit.rrr.reduced$coef
## coef.svd.reduced <- svd(coef.rrr.reduced,
## nu = nrank,
## nv = nrank)
## }
## if(ini.type == "c"){
## coef.svd.reduced <- svd(coef.lasso.reduced,
## nu = nrank,
## nv = nrank)
## coef.rrr.reduced <- coef.svd.reduced$u %*%
## diag(coef.svd.reduced$d[1:nrank],nrow=nrank) %*%
## t(coef.svd.reduced$v)
## }
## coef.svd$u
## coef.svd$d[1:nrank]
## coef.svd$v
list(p.index = p.index,
q.index = q.index)
}
### Solve the Procrustes problem in SoFAR
sofar.procrustes <- function(XY,
XX,
D,
rho2,
U = NULL,
identity = FALSE,
control = list(maxit = 50L, epsilon = 1e-3)) {
control <- do.call("sofar.control", control)
p <- ncol(XX)
converged <- FALSE
if (is.null(U) | identity == TRUE) {
XYsvd <- svd(XY)
U <- tcrossprod(XYsvd$u, XYsvd$v)
## U <- XYsvd$u%*%t(XYsvd$v)
diff <- NULL
niter = NULL
converged <- TRUE
}
if (identity == FALSE) {
epsilon <- control$epsilon
maxit <- control$maxit
niter <- 1
## rho2 <- eigen(XX)$values[1]
diff <- vector()
diff[1] <- 10 * epsilon
## D = diag(D,nrow=length(D))
## ZZ <- (rho2*diag(nrow=p)-XX)
ZZ <- -XX
diag(ZZ) <- rho2 + diag(ZZ)
XYD <- XY %*% D
Dsq <- D ^ 2
while (niter <= maxit & diff[niter] > epsilon) {
U0 <- U
## Z <- ((rho2*diag(nrow=p)-XX)%*%U0%*%D + XY)%*%D
Z <- ZZ %*% U0 %*% Dsq + XYD
Zsvd <- svd(Z, nu = nrow(D), nv = nrow(D))
#U <- Zsvd$u%*%Zsvd$v
U <- tcrossprod(Zsvd$u, Zsvd$v)
niter <- niter + 1
diff[niter] <- sqrt(sum((U - U0) ^ 2))
## diff[niter] <- sqrt(sum((Y-X%*%U%*%D)^2))
}
}
list(
U = U,
diff = diff,
niter = niter,
converged = converged
)
}
###Fit SoFAR with fixed tuning parameters
sofar.fit <- function(Y,
X,
XX = NULL,
XY = NULL,
Yvec = NULL,
eigenXX = NULL,
nrank = 1,
su = NULL,
sv = NULL,
modstr = list(),
init = list(U = NULL, V = NULL, D = NULL),
control = list()) {
n <- nrow(Y)
p <- ncol(X)
q <- ncol(Y)
if(is.null(su)) su <- rep(1,nrank)
#Su_sqrt <- diag(nrow=nrank, ncol=nrank)
#Su_sqrt_inv <- diag(nrow=nrank, ncol=nrank)
# } else{
# Su_sqrt <- diag(sqrt(su),nrow=nrank, ncol=nrank)
# #Su_sqrt_inv <- diag(1/sqrt(su),nrow=nrank, ncol=nrank)
# }
if(is.null(sv)) sv <- rep(1,nrank)
#Sv_sqrt <- diag(nrow=nrank, ncol=nrank)
#Sv_sqrt_inv <- diag(nrow=nrank, ncol=nrank)
# } else{
# Sv_sqrt <- diag(sqrt(sv),nrow=nrank, ncol=nrank)
# #Sv_sqrt_inv <- diag(1/sqrt(sv),nrow=nrank, ncol=nrank)
# }
modstr <- do.call("sofar.modstr", modstr)
##init <- do.call("sofar.0", init)
control <- do.call("sofar.control", control)
## FIXME: xrank may become an input is the same X is used in multiple calls
xrank <- sum(svd(X)$d > control$sv.tol)
## init
U <- init$U
V <- init$V
D <- init$D
## modstr
#nrank <- modstr$nrank;
mu <- modstr$mu
mugamma <- modstr$mugamma
WA <- modstr$WA
WB <- modstr$WB
Wd <- modstr$Wd
wgamma <- modstr$wgamma
## control
lamA <-
control$lamA
lamB <- control$lamB
lamD <- control$lamD
methodA <- control$methodA
methodB <- control$methodB
epsilon <- control$epsilon
maxit <- control$maxit
innerEpsilon <-
control$innerEpsilon
innerMaxit <- control$innerMaxit
tol <- control$sv.tol
if (is.null(U) || is.null(V) || is.null(D)) {
ini <- rrr.fit(Y, X, nrank, coefSVD = TRUE)
U <- t(sqrt(su)*t(ini$coefSVD$u))
V <- t(sqrt(sv)*t(ini$coefSVD$v))
D <- ini$coefSVD$d/sqrt(su)/sqrt(sv)
} else {
ini <- NULL
}
## d <- diag(D,nrow=length(D),ncol=length(D))
## FIXME:
## diag(D,nrow=length(D),ncol=length(D)) need to have nrow specified for
## security diag(0.9) returns a matrix of 0 by 0 instead of a 1 by 1 matrix
## of 0.9
A <- t(t(U) * D) # U%*%D
B <- t(t(V) * D) # V%*%D
GA <- matrix(0., p, nrank)
GB <- matrix(0., q, nrank)
nzid <- 1:nrank
## Adaptive weights
if (is.null(WA) || is.null(WB) || is.null(Wd)) {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank, coefSVD = TRUE)
WA <- switch(
methodA,
lasso = matrix(1., p, nrank),
adlasso = (ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))
) ^ (-wgamma),
glasso = rep(1., p),
adglasso =
sqrt(rowSums((
ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
WB <- switch(
methodB,
lasso = matrix(1., q, nrank),
adlasso = (ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))
) ^ (-wgamma),
glasso = rep(1., q),
adglasso =
sqrt(rowSums((
ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))
) ^2)) ^ (-wgamma)
)
if (substr(methodA, 1, 2) == "ad" ||
substr(methodB, 1, 2) == "ad") {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank, coefSVD = TRUE)
Wd <- (ini$coefSVD$d/sqrt(su)/sqrt(sv)) ^ (-wgamma)
} else {
Wd <- rep(1., nrank)
}
}
## Compute some products for later use
if (is.null(XX))
XX <- crossprod(X) # t(X)%*%X
if (is.null(eigenXX))
eigenXX <- eigen(XX)$values[1]
if (is.null(XY))
XY <- crossprod(X, Y) # t(X)%*%Y
if (is.null(Yvec))
Yvec <- as.vector(Y)
for (i in 1:maxit) {
## while(diff[iter] > conv & iter < maxit){
## Store initial values before iterations
## update rank
nzid <- which(D != 0)
## If zero solution occurs
if (length(nzid) == 0)
nzid <- 1 ## JY: WHY?
nrank <- length(nzid)
U <- U0 <- as.matrix(U[, nzid, drop = FALSE])
V <- V0 <- as.matrix(V[, nzid, drop = FALSE])
D <- D0 <- D[nzid] # , nzid])
## d <- d0 <- d[nzid]
A <- A0 <- as.matrix(A[, nzid, drop = FALSE])
B <- B0 <- as.matrix(B[, nzid, drop = FALSE])
GA <- GA0 <- as.matrix(GA[, nzid, drop = FALSE])
GB <- GB0 <- as.matrix(GB[, nzid, drop = FALSE])
WA <- switch(
methodA,
lasso = as.matrix(WA[, nzid]),
adlasso = as.matrix(WA[, nzid]),
glasso = as.vector(WA),
adglasso = as.vector(WA)
)
WB <- switch(
methodB,
lasso = as.matrix(WB[, nzid]),
adlasso = as.matrix(WB[, nzid]),
glasso = as.vector(WB),
adglasso = as.vector(WB)
)
Wd <- Wd[nzid]
ee <- diag(nrow = nrank)
su <- su[nzid]
sv <- sv[nzid]
## L2 step -------------------------------------------------------------
## 1. U step
## Note that only XY and XX are needed
XXmu <- XX + mu * diag(nrow = p)
XYu <- XY %*% V + mu * (A - GA)
XXu <- XXmu
Du <- D
## Ustep <- sofar.procrustes(XYu, XXu,
## diag(Du,nrow=length(Du),
## ncol=length(Du)),
## rho2 = eigenXX + mu, U,
## control = list(maxit=innerMaxit,
## epsilon = innerEpsilon))
Ustep <- procrustes_RCpp(
XYu,
XXu,
diag(Du*sqrt(su), nrow = length(Du), ncol = length(Du)),
rho2 = eigenXX + mu,
t((1/sqrt(su))*t(U)),
control = list(maxit = innerMaxit,
epsilon = innerEpsilon)
)
U <- t(sqrt(su)*t(Ustep$U))
## 2. V step
tmp <- crossprod(XY, U) + mu * (B - GB)
tmp <- t(D*sqrt(sv)* t(tmp))
XYvsvd <- svd(tmp)
V <- t(sqrt(sv)*tcrossprod(XYvsvd$v,XYvsvd$u))
## 3. D step
Xd1 <- matrix(0., n * q, nrank)
Xd2 <- matrix(0., p * nrank, nrank)
Xd3 <- matrix(0., q * nrank, nrank)
XU <- X %*% U
for (k in 1:nrank) {
Xd1[, k] <-
kron_RcppArma(V[, k, drop = FALSE], XU[, k, drop = FALSE])
## ek <- as.vector(rep(0., nrank))
## ek[k] <- 1
Xd2[, k] <-
kron_RcppArma(U[, k, drop = FALSE], ee[, k, drop = FALSE]) # ek)
Xd3[, k] <-
kron_RcppArma(V[, k, drop = FALSE], ee[, k, drop = FALSE]) # ek)
}
Xd <-
crossprod(Xd1) + mu * (crossprod(Xd2) + crossprod(Xd3))
Yd <-
crossprod(Xd1, Yvec) + mu * (t(Xd2) %*% as.vector(t(A - GA)) +
t(Xd3) %*% as.vector(t(B - GB)))
if (lamD == 0) {
D <- solve(Xd, Yd)
} else {
## Wdminv <- solve(diag(Wd, nrow=nrank,ncol=nrank))
Wdminv <- 1. / Wd ## a vector
Xdw <- Wdminv * Xd %*% diag(Wdminv, nrow = length(Wdminv))
Ydw <- Wdminv * Yd
D <- Wdminv * lasso_shooting(xtx = Xdw, xty = Ydw, lambda = lamD)
}
## FIXME: should this be a tolerence?
D[D < 0] <- 0
D <- as.vector(D)
## D <- as.vector(d) ## diag(nrow=nrank,ncol=nrank, as.vector(d))
## If some d becomes zero, update U and V
U[, which(D == 0)] <- 0
V[, which(D == 0)] <- 0
## Thresholding step ---------------------------------------------------
##There might be some errors in the memo
##A <- softTH(U%*%D+GA,lamA/mu*WA)
##B <- softTH(V%*%D+GB,lamB/mu*WB)
A <- switch(
methodA,
lasso = softTH(U %*% diag(D, nrow = length(D)) +
GA, lamA / mu * WA, tol = ifelse(lamA / mu * WA <=tol, 0, tol)),
adlasso = softTH(U %*% diag(D, nrow = length(D)) +
GA, lamA / mu * WA, tol = ifelse(lamA / mu * WA <=tol, 0, tol)),
glasso = softrowTH(U %*% diag(D, nrow = length(D)) +
GA, lamA / mu * WA, tol = ifelse(lamA / mu * WA <=tol, 0, tol))$C,
adglasso = softrowTH(U %*% diag(D, nrow = length(D)) +
GA, lamA / mu * WA, tol = ifelse(lamA / mu * WA <=tol, 0, tol))$C
)
B <- switch(
methodB,
lasso = softTH(V %*% diag(D, nrow = length(D)) +
GB, lamB / mu * WB, tol = ifelse(lamB / mu * WB <=tol, 0, tol)),
adlasso = softTH(V %*% diag(D, nrow = length(D)) +
GB, lamB / mu * WB, tol = ifelse(lamB / mu * WB <=tol, 0, tol)),
glasso = softrowTH(V %*% diag(D, nrow = length(D)) +
GB, lamB / mu * WB, tol = ifelse(lamB / mu * WB <=tol, 0, tol))$C,
adglasso = softrowTH(V %*% diag(D, nrow = length(D)) +
GB, lamB / mu * WB, tol = ifelse(lamB / mu * WB <=tol, 0, tol))$C
)
## Dual step
GA <-
(GA + (t(D * t(U)) - A)) / mugamma # (GA + (U %*% D - A)) / mugamma
GB <-
(GB + (t(D * t(V)) - B)) / mugamma # (GB + (V %*% D - B)) / mugamma
mu <- mu * mugamma
del <- sqrt(sum((A - A0) ^ 2) + sum((B - B0) ^ 2))
if (del < epsilon){
#cat("del = ", del , "\n")
break
}
}
## Final results
invD <- D
invD[D != 0] <- 1 / D[D != 0]
U <- A %*% diag(invD, nrow = length(invD)) # ginv(D)
V <- B %*% diag(invD, nrow = length(invD)) # ginv(D)
d0id <- which(colSums(abs(U)) == 0)
D[d0id] <- 0
d0id <- which(colSums(abs(V)) == 0)
D[d0id] <- 0
## some rounding##
D[abs(D) / sum(D) < tol] <- 0
#dorder <- order(D, decreasing = TRUE)
dorder <- 1:length(invD)
## D <- as.vector(d) # diag(nrow=nrank, ncol=nrank, as.vector(d[dorder]))
## Update estimated rank
nrank <- sum(D != 0)
## Reordering
U <- as.matrix(A[, dorder] %*% diag(invD, nrow = length(invD)))
V <- as.matrix(B[, dorder] %*% diag(invD, nrow = length(invD)))
drank <- max(nrank, 1)
U <- as.matrix(U[, 1:drank, drop = FALSE])
V <- as.matrix(V[, 1:drank, drop = FALSE])
D <- D[1:drank]
## Compute infomation criteria
## residual <- (Y-X%*%U%*%D%*%t(V))
residual <- (Y - X %*% U %*% diag(D, nrow = drank) %*% t(V))
sse <- sum(residual ^ 2)
dfu0 <- switch(
methodA,
lasso = sum(U != 0),
adlasso = sum(U != 0),
glasso = sum(apply(U, 1, function(a)
sum(a ^ 2)) != 0) * nrank,
adglasso = sum(apply(U, 1, function(a)
sum(a ^ 2)) != 0) * nrank
)
dfv0 <- switch(
methodB,
lasso = sum(V != 0),
adlasso = sum(V != 0),
glasso = sum(apply(V, 1, function(a)
sum(a ^ 2)) != 0) * nrank,
adglasso = sum(apply(V, 1, function(a)
sum(a ^ 2)) != 0) * nrank
)
df <- dfu0 * xrank / p + dfv0 - nrank ^ 2
BIC <- log(sse) + log(q * n) / q / n * df
GIC <- log(sse) + log(log(n * q)) * log(p * q) / q / n * df
AIC <- log(sse) + 2 / q / n * df
GCV <- sse / q / n / (1 - df / q / n) ^ 2
converged <- del <= epsilon
list(
# diff=diff,iter=iter,
ic = c(
GIC = GIC,
BIC = BIC,
AIC = AIC,
GCV = GCV
),
sse = sse,
df = df,
converged = converged,
U = U,
V = V,
D = D,
rank = nrank
)
}
sofar.lammax <- function(Y,
X,
U = NULL,
V = NULL,
D = NULL,
XX = NULL,
XY = NULL,
Yvec = NULL,
eigenXX = NULL,
nrank = 3,
su = NULL,
sv = NULL,
## lamA=0,lamB=0,lamD=0,
mu = 1,
mugamma = 1.2,
conv = 1e-3,
maxit = 100,
## inner.conv=1e-3,inner.iter=50,
methodA = c("lasso", "adlasso", "glasso", "adglasso")[2],
methodB = c("lasso", "adlasso", "glasso", "adglasso")[2],
WA = NULL,
WB = NULL,
Wd = NULL,
wgamma = 2)
{
tol <- 1e-2
lamA = 0
lamB = 0
lamD = 0
n <- nrow(Y)
p <- ncol(X)
q <- ncol(Y)
nr <- nrank
xrank <- sum(svd(X)$d > 1e-4)
if(is.null(su)) su <- rep(1,nrank)
# Su_inv <- diag(nrow=nrank, ncol=nrank)
# Su_inv_sqrt <- diag(nrow=nrank, ncol=nrank)
# }else{
# Su_inv <- diag(sqrt(su),nrow=nrank, ncol=nrank)
# Su_inv_sqrt <- diag(1/sqrt(su),nrow=nrank, ncol=nrank)
# }
if(is.null(sv)) sv <- rep(1,nrank)
# Sv_inv <- diag(nrow=nrank, ncol=nrank)
# Sv_inv_sqrt <- diag(nrow=nrank, ncol=nrank)
# }else{
# Sv_inv <- diag(sqrt(sv),nrow=nrank, ncol=nrank)
# Sv_inv_sqrt <- diag(1/sqrt(sv),nrow=nrank, ncol=nrank)
# }
if (is.null(U) | is.null(V) | is.null(D)) {
ini <- rrr.fit(Y, X, nrank = nrank, coefSVD = TRUE)
U <- t(sqrt(su)*t(ini$coefSVD$u))
V <- t(sqrt(sv)*t(ini$coefSVD$v))
D <- diag(ini$coefSVD$d/sqrt(su)/sqrt(sv), nrow = length(ini$coefSVD$d))
} else{
ini <- NULL
}
d <- diag(D)
A <- U %*% D
B <- V %*% D
O <- D
o <- diag(O)
GA <- matrix(nrow = p, ncol = nr, 0)
GB <- matrix(nrow = q, ncol = nr, 0)
GO <- matrix(nrow = nr, ncol = nr, 0)
## Adaptive weights
if (is.null(WA) | is.null(WB) | is.null(Wd)) {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank = nrank, coefSVD = TRUE)
WA <- switch(
methodA,
lasso = matrix(nrow = p, ncol = nrank, 1),
adlasso = ((
ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv), nrow = length(ini$coefSVD$d))
) + 1e-8) ^ (- wgamma),
glasso = rep(1, p),
adglasso = apply(ini$coefSVD$u
%*% diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d)),
1, function(a)
sqrt(sum(a ^ 2))) ^ {
-wgamma
}
)
WB <- switch(
methodB,
lasso = matrix(nrow = q, ncol = nrank, 1),
adlasso = ((
ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su), nrow = length(ini$coefSVD$d))
) + 1e-8) ^ (- wgamma),
glasso = rep(1, q),
adglasso = apply(ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d)),
1, function(a)
sqrt(sum(a ^ 2))) ^ {
-wgamma
}
)
if (substr(methodA, 1, 2) == "ad" |
substr(methodB, 1, 2) == "ad") {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank = nrank, coefSVD = TRUE)
Wd <- (ini$coefSVD$d / sqrt(su) / sqrt(sv) + 1e-8) ^ {
-wgamma
}
} else{
Wd <- rep(1, nrank)
}
}
##Compute some products for later use
if (is.null(XX))
XX <- crossprod(X) # t(X)%*%X
if (is.null(eigenXX))
eigenXX <- eigen(XX)$values[1]
if (is.null(XY))
XY <- crossprod(X, Y) # t(X)%*%Y
if (is.null(Yvec))
Yvec <- as.vector(Y)
lamA.max <- 0
lamB.max <- 0
lamD.max <- 0
iter <- 1
diff <- vector()
diff[1] <- conv * 2
while (diff[iter] > conv && iter < maxit) {
## Store initial values before iterations
U0 <- U
V0 <- V
D0 <- D
d0 <- d
A0 <- A
B0 <- B
O0 <- O
o0 <- o
GA0 <- GA
GB0 <- GB
GO0 <- GO
## L2 step
## 1. U step
## Note that only XY and XX are needed
XXmu <- XX + mu * diag(nrow = p)
## Yu <- XXsqrtm%*%(XY%*%V+mu*(A-GA))
XYu <- XY %*% V + mu * (A - GA)
##Xu <- XXsqrt
XXu <- XXmu
Du <- D
Ustep <- procrustes_RCpp(XYu,
XXu,
sqrt(su)*D,
rho2 = eigenXX + mu,
t((1/sqrt(su))*t(U)),
control = list(maxit = 50,
epsilon = 1e-3))
U <- t(sqrt(su)*t(Ustep$U))
## 2. V step
XYvsvd <- svd((t(XY) %*% U + mu * (B - GB)) %*% (sqrt(sv)*D))
V <- t(sqrt(sv)*tcrossprod(XYvsvd$v, XYvsvd$u))
## 3. D step
Xd1 <- matrix(nrow = n * q, ncol = nr)
Xd2 <- matrix(nrow = p * nr, ncol = nr)
Xd3 <- matrix(nrow = q * nr, ncol = nr)
Xd4 <- vector()
XU <- X %*% U
for (k in 1:nr) {
Xd1[, k] <- kronecker(V[, k], XU[, k])
ek <- as.vector(rep(0, nr))
ek[k] <- 1
Xd2[, k] <- kronecker(U[, k], ek)
Xd3[, k] <- kronecker(V[, k], ek)
}
Xd4 <- as.vector(diag(O - GO))
Xd <-
t(Xd1) %*% Xd1 + mu * (t(Xd2) %*% Xd2 + t(Xd3) %*% Xd3 + diag(nrow = nrank))
Yd <- t(Xd1) %*% Yvec +
mu * (t(Xd2) %*% as.vector(t(A - GA)) +
t(Xd3) %*% as.vector(t(B - GB)) + as.vector(diag(O - GO)))
d <- solve(Xd) %*% Yd
d[d < tol] <- 0
D <- diag(nrow = nr, ncol = nr, as.vector(d))
## If some d becomes zero, update U and V
U[, which(d == 0)] <- 0
V[, which(d == 0)] <- 0
## Thresholding step
A <- switch(
methodA,
lasso = softTH(U %*% D + GA, lamA / mu * WA),
adlasso = softTH(U %*% D + GA, lamA / mu * WA),
glasso = softrowTH(U %*% D + GA, lamA / mu * WA)$C,
adglasso = softrowTH(U %*% D + GA, lamA / mu * WA)$C
)
##Estimate lamA.max
lamA.max <- switch(
methodA,
lasso = max(abs((U %*% D + GA) * mu / WA)),
adlasso = max(abs((U %*% D + GA) * mu / WA)),
glasso = max(abs(lnorm(U %*% D + GA, 2) * mu /
WA)),
adglasso = max(abs(lnorm(U %*% D + GA, 2) * mu /
WA))
)
B <- switch(
methodB,
lasso = softTH(V %*% D + GB, lamB / mu * WB),
adlasso = softTH(V %*% D + GB, lamB / mu * WB),
glasso = softrowTH(V %*% D + GB, lamB / mu * WB)$C,
adglasso = softrowTH(V %*% D + GB, lamB / mu * WB)$C
)
lamB.max <- switch(
methodB,
lasso = max(abs((V %*% D + GB) * mu / WB)),
adlasso = max(abs((V %*% D + GB) * mu / WB)),
glasso = max(abs(lnorm(V %*% D + GB, 2) * mu /
WB)),
adglasso = max(abs(lnorm(V %*% D + GB, 2) * mu /
WB))
)
o <- softTH(diag(D + GO), lamD / mu * Wd)
lamD.max <- max(abs(diag(D + GO) * mu / Wd))
o[o < 0] <- 0
O <- diag(nrow = nr, ncol = nr, o)
## Dual step
GA <- (GA + (U %*% D - A)) / mugamma
GB <- (GB + (V %*% D - B)) / mugamma
GO <- (GO + (D - O)) / mugamma
mu <- mu * mugamma
iter <- iter + 1
## diff <- sqrt(sum((A%*%t(B)-A0%*%t(B0))^2))
diff[iter] <-
sqrt(sum((A - A0) ^ 2) + sum((B - B0) ^ 2) + sum((O - O0) ^ 2))
}
list(lamA.max = lamA.max,
lamB.max = lamB.max,
lamD.max = lamD.max)
}
sofar.path.reduced <- function(Y,
X,
XX = NULL,
XY = NULL,
Yvec = NULL,
eigenXX = NULL,
nrank = 1,
su = NULL,
sv = NULL,
lamA = 1,
lamB = 1,
lamD = 1,
ic = c("GIC", "BIC", "AIC", "GCV"),
modstr = list(),
init = list(U = NULL, V = NULL, D = NULL),
control = list()) {
p <- ncol(X)
q <- ncol(Y)
n <- nrow(Y)
nlamA <- length(lamA)
nlamB <- length(lamB)
nlamD <- length(lamD)
stopifnot(nlamA == nlamB & nlamB == nlamD)
if(is.null(su)) su <- rep(1,nrank)
# Su_inv <- diag(nrow=nrank, ncol=nrank)
# Su_inv_sqrt <- diag(nrow=nrank, ncol=nrank)
# }else{
# Su_inv <- diag(sqrt(su),nrow=nrank, ncol=nrank)
# Su_inv_sqrt <- diag(1/sqrt(su),nrow=nrank, ncol=nrank)
# }
if(is.null(sv)) sv <- rep(1,nrank)
# Sv_inv <- diag(nrow=nrank, ncol=nrank)
# Sv_inv_sqrt <- diag(nrow=nrank, ncol=nrank)
# }else{
# Sv_inv <- diag(sqrt(sv),nrow=nrank, ncol=nrank)
# Sv_inv_sqrt <- diag(1/sqrt(sv),nrow=nrank, ncol=nrank)
# }
modstr <- do.call("sofar.modstr", modstr)
control <- do.call("sofar.control", control)
## nrank <- modstr$nrank
nlam <- nlamA
ICpath <- array(dim = c(nlam, 4), NA)
Upath <- array(dim = c(nlam, p, nrank), 0)
Vpath <- array(dim = c(nlam, q, nrank), 0)
Dpath <- array(dim = c(nlam, nrank), 0)
Rpath <- rep(0, nlam)
## initial value
##init <- do.call("sofar.0", init)
U <- init$U
V <- init$V
D <- init$D
ini <- NULL
## Adaptive weights
WA <-
modstr$WA
WB <- modstr$WB
Wd <- modstr$Wd
wgamma <- modstr$wgamma
methodA <- control$methodA
methodB <- control$methodB
if (is.null(WA) || is.null(WB) || is.null(Wd)) {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank, coefSVD = TRUE)
WA <- switch(
methodA,
lasso = matrix(1., p, nrank),
adlasso = (ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))) ^ (-wgamma),
glasso = rep(1., p),
adglasso = sqrt(rowSums((
ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
WB <- switch(
methodB,
lasso = matrix(1., q, nrank),
adlasso = (ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))) ^ (-wgamma),
glasso = rep(1., q),
adglasso = sqrt(rowSums((
ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
if (substr(methodA, 1, 2) == "ad" ||
substr(methodB, 1, 2) == "ad") {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank, coefSVD = TRUE)
Wd <- (ini$coefSVD$d/sqrt(su)/sqrt(sv)) ^ (-wgamma)
} else {
Wd <- rep(1., nrank)
}
}
## initial
Upath[1, ,] <- U
Vpath[1, ,] <- V
Dpath[1,] <- D
Rpath[1] <- nrank
for (h in 1:nlam) {
hidx <- max(h - 1, 1)
U <- as.matrix(Upath[hidx, ,])
V <- as.matrix(Vpath[hidx, ,])
D <- Dpath[hidx,]
rfit <- Rpath[hidx]
U <- as.matrix(U[, 1:rfit])
V <- as.matrix(V[, 1:rfit])
D <- D[1:rfit]
## weights according to the rank
WA1 <- switch(
methodA,
lasso = as.matrix(WA[, 1:rfit]),
adlasso = as.matrix(WA[, 1:rfit]),
glasso = as.vector(WA),
adglasso = as.vector(WA)
)
WB1 <- switch(
methodB,
lasso = as.matrix(WB[, 1:rfit]),
adlasso = as.matrix(WB[, 1:rfit]),
glasso = as.vector(WB),
adglasso = as.vector(WB)
)
Wd1 <- Wd[1:rfit]
## modstr$nrank <- rfit
modstr$WA <- WA1
modstr$WB <- WB1
modstr$Wd <- Wd1
control$lamA <- lamA[h]
control$lamB <- lamB[h]
control$lamD <- lamD[h]
## Use warm start
fit <- sofar.fit(
Y,
X,
XX = XX,
XY = XY,
Yvec = Yvec,
eigenXX = eigenXX,
nrank = rfit,
su = su,
sv = sv,
modstr = modstr,
init = list(U = U, V = V, D = D),
control = control
)
U <- fit$U
V <- fit$V
D <- fit$D
rfit <- fit$rank
if (rfit == 0) {
ICpath[h:nlam, ] <- matrix(fit$ic,
nrow = nlam - h + 1,
ncol = 4,
byrow = TRUE)
Rpath[h:nlam] <- 0
break
}
ICpath[h, ] <- fit$ic
Upath[h, , seq_len(rfit)] <- U
Vpath[h, , seq_len(rfit)] <- V
Dpath[h, seq_len(rfit)] <- D
Rpath[h] <- rfit
}
minidx <- apply(ICpath, 2, which.min)
IC <- match.arg(ic)
## Which IC to use?
ICid <- switch(
IC,
GIC = minidx[1],
BIC = minidx[2],
AIC = minidx[3],
GCV = minidx[4]
)
## Note that minid are reported based on the original tuning sequences.
list(
Upath = Upath,
Vpath = Vpath,
Dpath = Dpath,
Rpath = Rpath,
ICpath = ICpath,
lam.id = minidx,
U = Upath[ICid, , 1:Rpath[ICid]],
V = Vpath[ICid, , 1:Rpath[ICid]],
D = Dpath[ICid, 1:Rpath[ICid]],
rank = Rpath[ICid]
)
}
sofar.cv <- function(Y,
X,
XX = NULL,
XY = NULL,
Yvec = NULL,
eigenXX = eigenXX,
nrank = 1,
su = NULL,
sv = NULL,
lamA = 1,
lamB = 1,
lamD = 1,
nfold = 5,
norder = NULL,
#fold.drop = 0,
modstr = list(),
init = list(U = NULL, V = NULL, D = NULL),
control = list())
{
p <- ncol(X)
q <- ncol(Y)
n <- nrow(Y)
nlamA <- length(lamA)
nlamB <- length(lamB)
nlamD <- length(lamD)
stopifnot(nlamA == nlamB & nlamB == nlamD)
nlam <- nlamA
if(is.null(sv)) su <- rep(1,nrank)
if(is.null(sv)) sv <- rep(1,nrank)
modstr <- do.call("sofar.modstr", modstr)
## init <- do.call("sofar.0", init)
control <- do.call("sofar.control", control)
## Adaptive weights are the same for different fold
WA <- modstr$WA
WB <- modstr$WB
Wd <- modstr$Wd
wgamma <- modstr$wgamma
methodA <- control$methodA
methodB <- control$methodB
ini <- NULL
if (is.null(WA) || is.null(WB) || is.null(Wd)) {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank, coefSVD = TRUE)
WA <- switch(
methodA,
lasso = matrix(1., p, nrank),
adlasso = (ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))) ^ (-wgamma),
glasso = rep(1., p),
adglasso = sqrt(rowSums((
ini$coefSVD$u %*%
diag(ini$coefSVD$d/sqrt(sv),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
WB <- switch(
methodB,
lasso = matrix(1., q, nrank),
adlasso = (ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))) ^ (-wgamma),
glasso = rep(1., q),
adglasso = sqrt(rowSums((
ini$coefSVD$v %*%
diag(ini$coefSVD$d/sqrt(su),
nrow = length(ini$coefSVD$d))
) ^ 2)) ^ (-wgamma)
)
if (substr(methodA, 1, 2) == "ad" ||
substr(methodB, 1, 2) == "ad") {
if (is.null(ini))
ini <- rrr.fit(Y, X, nrank, coefSVD = TRUE)
Wd <- (ini$coefSVD$d/sqrt(su)/sqrt(sv)) ^ (-wgamma)
} else {
Wd <- rep(1., nrank)
}
modstr$WA <- WA
modstr$WB <- WB
modstr$Wd <- Wd
}
if (is.null(init$U) || is.null(init$V) || is.null(init$D)) {
init <- list(
U = t(sqrt(su)*t(ini$coefSVD$u)),
V = t(sqrt(sv)*t(ini$coefSVD$v)),
D = ini$coefSVD$d/sqrt(su)/sqrt(sv)
)
}
## Construct folds
ndel <- round(n / nfold)
if (is.null(norder))
norder <- sample(n)
nlam <- length(lamA)
cr_path <- array(NA, dim = c(nlam, nfold))
for (f in seq_len(nfold)) {
## determine cr sample
if (f != nfold) {
iddel <- norder[(1 + ndel * (f - 1)):(ndel * f)]
} else {
iddel <- norder[(1 + ndel * (f - 1)):n]
}
ndel <- length(iddel)
nf <- n - ndel
## idkeep <- (1:n)[-iddel]
Xf <- X[-iddel, ]
Xfdel <- X[iddel, ]
Yf <- Y[-iddel, ]
Yfdel <- Y[iddel, ]
XXf <- crossprod(Xf) # t(X)%*%X
XYf <- crossprod(Xf, Yf) # t(X)%*%Y
Yvecf <- as.vector(Yf)
eigenXXf <- eigen(XXf)$values[1]
fitf <- sofar.path.reduced(
Yf,
Xf,
XX = XXf,
XY = XYf,
Yvec = Yvecf,
eigenXX = eigenXXf,
nrank = nrank,
su = su,
sv = sv,
lamA = lamA,
lamB = lamB,
lamD = lamD,
ic = "GIC",
modstr = modstr,
control = control,
init = init
)
for (ll in 1:nlam) {
cr_path[ll, f] <- sum((
Yfdel - Xfdel %*% fitf$Upath[ll, ,] %*%
diag(fitf$Dpath[ll,], nrow = nrank) %*% t(fitf$Vpath[ll, ,])
) ^ 2)
}
}
index <- order(apply(cr_path, 2, sum))
crerr <- apply(cr_path[, index], 1, sum) / length(index) * nfold
minid <- which.min(crerr)
## Refit model with all data.
## modstr$WA <- WA; modstr$WB <- WB; modstr$Wd <- Wd
control$lamA <- lamA[minid]
control$lamB <- lamB[minid]
control$lamD <- lamD[minid]
## Initial values
## init <- list(U=fitf$Upath[minid,,],
## V=fitf$Vpath[minid,,],
## D=fitf$Dpath[minid,])
fitall <- sofar.path.reduced(
Y,
X,
XX = XX,
XY = XY,
Yvec = Yvec,
eigenXX = eigenXX,
ic = "GIC",
nrank = nrank,
su = su,
sv = sv,
lamA = lamA,
lamB = lamB,
lamD = lamD,
modstr = modstr,
init = init,
control = control
)
## fitall <- sofar.fit(Y, X, XX = XX, XY = XY,
## Yvec = Yvec, eigenXX = eigenXX,
## nrank = nrank, modstr = modstr,
## control = control, init = init)
list(
cr.path = cr_path,
cr.error = crerr,
lam.id = c(minid),
norder = norder,
## W = list(WA=WA, WB=WB, Wd=Wd),
Upath = fitall$Upath,
Vpath = fitall$Vpath,
Dpath = fitall$Dpath,
Rpath = fitall$Rpath,
U = fitall$Upath[minid, , 1:fitall$rank],
V = fitall$Vpath[minid, , 1:fitall$rank],
D = fitall$Dpath[minid, 1:fitall$rank],
## U = fitall$Upath[minid,,],
## V = fitall$Vpath[minid,,],
## D = fitall$Dpath[minid,],
## U = fitall$U,
## V = fitall$V,
## D = fitall$D,
rank = fitall$rank
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.