Nothing
#' Sparse Discriminant Analysis solved via Accelerated Proximal Gradient
#'
#' Applies accelerated proximal gradient algorithm to
#' the optimal scoring formulation of sparse discriminant analysis proposed
#' by Clemmensen et al. 2011.
#'
#' @param Xt n by p data matrix, (not a data frame, but a matrix)
#' @param Yt n by K matrix of indicator variables (Yij = 1 if i in class j).
#' This will later be changed to handle factor variables as well.
#' Each observation belongs in a single class, so for a given row/observation,
#' only one element is 1 and the rest is 0.
#' @param Om p by p parameter matrix Omega in generalized elastic net penalty.
#' @param gam Regularization parameter for elastic net penalty.
#' @param lam Regularization parameter for l1 penalty, must be greater than zero.
#' @param q Desired number of discriminant vectors.
#' @param PGsteps Maximum number if inner proximal gradient algorithm for finding beta.
#' @param PGtol Stopping tolerance for inner APG method.
#' @param maxits Number of iterations to run
#' @param tol Stopping tolerance for proximal gradient algorithm.
#' @param selector Vector to choose which parameters in the discriminant vector will be used to calculate the
#' regularization terms. The size of the vector must be *p* the number of predictors. The
#' default value is a vector of all ones. This is currently only used for ordinal classification.
#' @param initTheta Option to set the initial theta vector, by default it is a vector of all ones
#' for the first theta.
#' @param bt Boolean to indicate whether backtracking should be used, default false.
#' @param L Initial estimate for Lipshitz constant used for backtracking.
#' @param eta Scalar for Lipshitz constant.
#' @param rankRed Boolean indicating whether Om is in factorized form, such that R^t*R = mO
#' @return \code{SDAAP} returns an object of \code{\link{class}} "\code{SDAAP}" including a list
#' with the following named components:
#'
#' \describe{
#' \item{\code{call}}{The matched call.}
#' \item{\code{B}}{p by q matrix of discriminant vectors.}
#' \item{\code{Q}}{K by q matrix of scoring vectors.}
#' \item{\code{subits}}{Total number of iterations in proximal gradient subroutine.}
#' \item{\code{totalits}}{Number coordinate descent iterations for all discriminant vectors}
#' }
#' @seealso \code{SDAAPcv}, \code{\link{SDAP}} and \code{\link{SDAD}}
#' @keywords internal
SDAAP <- function(Xt, ...) UseMethod("SDAAP")
#' @return \code{NULL}
#'
#' @rdname SDAAP
#' @method SDAAP default
SDAAP.default <- function(Xt, Yt, Om, gam, lam, q, PGsteps, PGtol, maxits, tol, selector = rep(1,dim(Xt)[2]), initTheta, bt=FALSE, L, eta, rankRed = FALSE, ...){
# Get training data size
nt <- dim(Xt)[1] # num. samples
p <- dim(Xt)[2] # num. features
K <- dim(Yt)[2] # num. classes
# Number of iterations logging data
subits <- 0
totalits <- rep(maxits,q)
if(length(selector) != dim(Xt)[2]){
stop('The length of selector must be the same as that of Xt')
}
# Structure to store elements for matrix A used
# later on, i.e. precomputed values for speed.
A <- structure(list(
flag = NA,
gom = NA,
X = NA,
n = NA,
A = NA
),
class = "Amat"
)
# Check if Omega is diagonal
if(rankRed == TRUE){
A$flag <- 0
# Omega is supplied in factorised form
A$X <- sqrt(1/nt)*Xt
A$gom <- sqrt(gam)*Om # Because Om is factored
A$n <- nt
alpha <- 1/(2*(norm(Xt, type="1")*norm(Xt, type="I")/nt + norm(A$gom, type="1")*norm(A$gom, type="I")))
}else if(norm(diag(diag(Om))-Om, type = "F") < 1e-15){
A$flag <- 1
A$gom <- gam*diag(Om)
A$X <- Xt
A$n <- nt
A$A <- 2*(crossprod(Xt)/nt + gam*Om)
alpha <- 1/(2*(norm(Xt, type="1")*norm(Xt, type="I")/nt + norm(diag(A$gom), type="I")))
}else{
A$flag <- 0
A$A <- 2*(crossprod(Xt)/nt + gam*Om)
alpha <- 1/(norm(A$A, type="F"))
}
L <- 1/alpha
L <- norm(diag(diag(Om*gam)),'I')+norm(Xt,'F')^2
origL <- L
D <- (1/nt)*crossprod(Yt)
R <- chol(D)
Q <- matrix(1,K,q)
B <- matrix(0,p,q)
#------------------------------------------------------
# Alternating direction method to update (theta,beta)
#------------------------------------------------------
for(j in 1:q){
L <- origL
###
# Initialization
###
Qj <- Q[,1:j]
Mj <- function(u){
return(u-Qj%*%(crossprod(Qj,D%*%u)))
}
# Initialize theta
theta <- matrix(stats::runif(K),nrow=K,ncol=1)
theta <- Mj(theta)
if(j == 1 & !missing(initTheta)){
theta=initTheta
}
theta <- theta/as.numeric(sqrt(crossprod(theta,D%*%theta)))
# Initialize beta
beta <- matrix(0,p,1)
if(norm(diag(diag(Om))-Om, type = "F") < 1e-15 & sum(selector) == length(selector)){
# Extract reciprocal of diagonal of Omega
ominv <- 1/diag(Om)
# Compute rhs of f minimizer system
rhs0 <- crossprod(Xt, (Yt%*%(theta/nt)))
rhs = Xt%*%((ominv/nt)*rhs0)
# Partial solution
tmp_partial = solve(diag(nt)+Xt%*%((ominv/(gam*nt))*t(Xt)),rhs)
# Finish solving for beta using SMW
beta = (ominv/gam)*rhs0 - 1/gam^2*ominv*(t(Xt)%*%tmp_partial)
}
for(its in 1:maxits){
# Compute coefficient vector for elastic net step
d <- 2*crossprod(Xt,Yt%*%(theta/nt))
# Update beta using proximal gradient step
b_old <- beta
if(bt == FALSE & rankRed==FALSE){
betaOb <- APG_EN2(A, d, beta, lam, alpha, PGsteps, PGtol, selector)
beta <- betaOb$x
subits <- subits + betaOb$k
}else if(rankRed == TRUE){
betaOb <- APG_EN2rr(A, d, beta, lam, alpha, PGsteps, PGtol, selector)
beta <- betaOb$x
}else{
betaOb <- APG_EN2bt(A, Xt, Om, gam, d, beta, lam, L, eta, PGsteps, PGtol, selector)
beta <- betaOb$x
}
# Update theta using the projected solution
if(norm(beta, type="2") > 1e-12){
# Update theta
b <- crossprod(Yt,Xt%*%beta)
y <- forwardsolve(t(R),b)
z <- backsolve(R,y)
tt <- Mj(z)
t_old <- theta
theta <- tt/sqrt(as.numeric(crossprod(tt, D)%*%tt))
# Update changes
db <- norm(beta-b_old, type="2")/norm(beta, type="2")
dt <- norm(theta-t_old, type="2")/norm(theta, type="2")
} else{
# Update b and theta
beta <- beta*0
theta <- theta*0
db <- 0
dt <- 0
}
if(max(db,dt) < tol){
totalits[j] <- its
# Converged
break
}
}
# Make the first argument be positive, this is to make the results
# more reproducible and consistent.
if(theta[1] < 0){
theta <- (-1)*theta
beta <- (-1)*beta
}
# Update Q and B
Q[,j] <- theta
B[,j] <- beta
}
totalits <- sum(totalits)
#Return B and Q in a SDAAP object
retOb <- structure(
list(call = match.call(),
B = B,
Q = Q,
subits = subits,
totalits = totalits),
class = "SDAAP")
return(retOb)
}
# TODO: Implement print, summary, predict and plot of SDAAP object
# Potentially also print for output from summary and predict.
# Skim over again to see if crossprod can be used for more speed.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.