Nothing
#' Fast Adaptive Shrinkage/Thresholding Algorithm
#'
#' \code{fasta} implements back-tracking with Barzelai-Borwein step size selection
#'
#' @param f function handle for computing the smooth part of the objective
#' @param gradf function handle for computing the gradient of objective
#' @param g function handle for computing the nonsmooth part of the objective
#' @param proxg function handle for computing proximal mapping
#' @param x0 initial guess
#' @param tau1 initial stepsize
#' @param max_iters maximum iterations before automatic termination
#' @param w lookback window for non-montone line search
#' @param backtrack boolean to perform backtracking line search
#' @param recordIterates boolean to record iterate sequence
#' @param stepsizeShrink multplier to decrease step size
#' @param eps_n epsilon to prevent normalized residual from dividing by zero
#' @export
#' @examples
#'
#' #------------------------------------------------------------------------
#' # LEAST SQUARES: EXAMPLE 1 (SIMULATED DATA)
#' #------------------------------------------------------------------------
#'
#' set.seed(12345)
#' n <- 100
#' p <- 25
#' X <- matrix(rnorm(n*p),n,p)
#' beta <- matrix(rnorm(p),p,1)
#' y <- X%*%beta + rnorm(n)
#' beta0 <- matrix(0,p,1) # initial starting vector
#'
#' f <- function(beta){ 0.5*norm(X%*%beta - y, "F")^2 }
#' gradf <- function(beta){ t(X)%*%(X%*%beta - y) }
#' g <- function(beta) { 0 }
#' proxg <- function(beta, tau) { beta }
#' x0 <- double(p) # initial starting iterate
#' tau1 <- 10
#'
#' sol <- fasta(f,gradf,g,proxg,x0,tau1)
#' # Check KKT conditions
#' gradf(sol$x)
#'
#' #------------------------------------------------------------------------
#' # LASSO LEAST SQUARES: EXAMPLE 2 (SIMULATED DATA)
#' #------------------------------------------------------------------------
#'
#' set.seed(12345)
#' n <- 100
#' p <- 25
#' X <- matrix(rnorm(n*p),n,p)
#' beta <- matrix(rnorm(p),p,1)
#' y <- X%*%beta + rnorm(n)
#' beta0 <- matrix(0,p,1) # initial starting vector
#' lambda <- 10
#'
#' f <- function(beta){ 0.5*norm(X%*%beta - y, "F")^2 }
#' gradf <- function(beta){ t(X)%*%(X%*%beta - y) }
#' g <- function(beta) { lambda*norm(as.matrix(beta),'1') }
#' proxg <- function(beta, tau) { sign(beta)*(sapply(abs(beta) - tau*lambda,
#' FUN=function(x) {max(x,0)})) }
#' x0 <- double(p) # initial starting iterate
#' tau1 <- 10
#'
#' sol <- fasta(f,gradf,g,proxg,x0,tau1)
#' # Check KKT conditions
#' cbind(sol$x,t(X)%*%(y-X%*%sol$x)/lambda)
#'
#' #------------------------------------------------------------------------
#' # LOGISTIC REGRESSION: EXAMPLE 3 (SIMLUATED DATA)
#' #------------------------------------------------------------------------
#'
#' set.seed(12345)
#' n <- 100
#' p <- 25
#' X <- matrix(rnorm(n*p),n,p)
#' y <- sample(c(0,1),nrow(X),replace=TRUE)
#' beta <- matrix(rnorm(p),p,1)
#' beta0 <- matrix(0,p,1) # initial starting vector
#' f <- function(beta) { -t(y)%*%(X%*%beta) + sum(log(1+exp(X%*%beta))) } # objective function
#' gradf <- function(beta) { -t(X)%*%(y-plogis(X%*%beta)) } # gradient
#' g <- function(beta) { 0 }
#' proxg <- function(beta, tau) { beta }
#' x0 <- double(p) # initial starting iterate
#' tau1 <- 10
#'
#' sol <- fasta(f,gradf,g,proxg,x0,tau1)
#' # Check KKT conditions
#' gradf(sol$x)
#'
#' #------------------------------------------------------------------------
#' # LASSO LOGISTIC REGRESSION: EXAMPLE 4 (SIMLUATED DATA)
#' #------------------------------------------------------------------------
#'
#' set.seed(12345)
#' n <- 100
#' p <- 25
#' X <- matrix(rnorm(n*p),n,p)
#' y <- sample(c(0,1),nrow(X),replace=TRUE)
#' beta <- matrix(rnorm(p),p,1)
#' beta0 <- matrix(0,p,1) # initial starting vector
#' lambda <- 5
#'
#' f <- function(beta) { -t(y)%*%(X%*%beta) + sum(log(1+exp(X%*%beta))) } # objective function
#' gradf <- function(beta) { -t(X)%*%(y-plogis(X%*%beta)) } # gradient
#' g <- function(beta) { lambda*norm(as.matrix(beta),'1') }
#' proxg <- function(beta, tau) { sign(beta)*(sapply(abs(beta) - tau*lambda,
#' FUN=function(x) {max(x,0)})) }
#' x0 <- double(p) # initial starting iterate
#' tau1 <- 10
#'
#' sol <- fasta(f,gradf,g,proxg,x0,tau1)
#' # Check KKT conditions
#' cbind(sol$x,-gradf(sol$x)/lambda)
fasta <- function(f,gradf,g,proxg,x0,tau1,max_iters=1e2,w=10,backtrack=TRUE,recordIterates=FALSE,stepsizeShrink=0.5,eps_n=1e-15) {
## Allocate memory
residual <- double(max_iters) # Residuals
normalizedResid <- double(max_iters) # Normalized residuals
taus <- double(max_iters) # Stepsizes
fVals <- double(max_iters) # The value of 'f', the smooth objective term
objective <- double(max_iters+1) # The value of the objective function (f+g)
totalBacktracks <- 0 # How many times was backtracking activated?
backtrackCount <- 0 # Backtracks on this iterations
## Intialize array values
x1 <- x0
d1 <- x1
f1 <- f(d1)
fVals[1] <- f1
gradf1 <- gradf(d1)
if (recordIterates) {
iterates <- matrix(0,length(x0),max_iters+1)
iterates[,1] <- x1
} else {
iterates <- NULL
}
## The handle non-monotonicity
maxResidual <- -Inf # Stores the maximum value of the residual that has been seen. Used to evaluate stopping conditions.
minObjectiveValue <- Inf # Stores the best objective value that has been seen. Used to return best iterate, rather than last iterate
objective[1] <- f1 + g(x0)
## Begin Loop
for (i in 1:max_iters) {
## Rename iterates relative to loop index. "0" denotes index i, and "1" denotes index i+1
x0 <- x1 # x_i <--- x_{i+1}
gradf0 <- matrix(gradf1) # gradf0 is now $\nabla f(x_i)$
tau0 <- tau1 # \tau_i <--- \tau_{i+1}
## FBS step: obtain x_{i+1} from x_i
x1hat <- x0 - tau0 * c(gradf0) # Define \hat x_{i+1}
x1 <- proxg(x1hat, tau0) # Define x_{i+1}
## Non-monotone backtracking line search
Dx <- matrix(x1 - x0)
d1 <- x1
f1 <- f(d1)
if (backtrack) {
M <- max( fVals[max(i-w,1):max(i-1,1)] ) # Get largest of last 10 values of 'f'
backtrackCount <- 0
prop <- (f1 - 1e-12 > M + t(Dx)%*%gradf0 + 0.5*(norm(Dx,'f')**2)/tau0) && (backtrackCount < 20)
# print(paste("Pre AND: ", prop))
# print(paste("Sufficient Decrease: ",f1 - 1e-12 > M + t(Dx)%*%gradf0 + 0.5*(norm(Dx,'f')**2)/tau0) )
# print(paste("f1: ", f1, "M: ", M, "<Dx,gradf0>: ", t(Dx)%*%gradf0, "tau0: ", tau0))
# print(paste("backtrack: ",backtrackCount < 20))
# Note: 1e-12 is to quench rounding errors
while ( prop ) {# The backtracking loop
tau0 <- tau0*stepsizeShrink # shrink stepsize
x1hat <- x0 - tau0*c(gradf0) # redo the FBS
x1 <- proxg(x1hat, tau0)
d1 <- x1
f1 <- f(d1)
Dx <- matrix(x1 - x0)
backtrackCount <- backtrackCount + 1
# print(paste("Sufficient Decrease: ",f1 - 1e-12 > M + t(Dx)%*%gradf0 + 0.5*(norm(Dx,'f')**2)/tau0) )
# print(paste("backtrack: ",backtrackCount < 20))
prop <- (f1 - 1e-12 > M + t(Dx)%*%gradf0 + 0.5*(norm(Dx,'f')**2)/tau0) && (backtrackCount < 20)
# print(paste("AND: ", prop))
# print(paste("backtrackCount: ",backtrackCount))
}
totalBacktracks <- totalBacktracks + backtrackCount
}
## Record information
taus[i] <- tau0 # stepsize
residual[i] <- norm(Dx,'f')/tau0 # Estimate of the gradient, should be zero at solution
maxResidual <- max(maxResidual, residual[i])
normalizer <- max(norm(gradf0,'f'),norm(as.matrix(x1 - x1hat),'f')/tau0) + eps_n
normalizedResid[i] <- residual[i]/normalizer # Normalized residual: size of discrepancy between the two derivative terms, divided by the size of the terms
fVals[i] <- f1
# record function values
objective[i+1] <- f1 + g(x1)
newObjectiveValue <- objective[i+1]
if (recordIterates) { # record iterate values
iterates[,i+1] <- x1
}
if (newObjectiveValue < minObjectiveValue) { # Methods is non-monotone: Make sure to record best solutiom
bestObjectiveIterate <- x1
minObjectiveValue <- min(minObjectiveValue, newObjectiveValue)
}
## Compute stepsize needed for next iteration using BB/spectral method
gradf1 <- gradf(d1)
Dg <- matrix(gradf1 + (x1hat - x0)/tau0) # Delta_g, note that Delta_x was recorded above during backtracking
dotprod <- t(Dx)%*%Dg
# print(paste("dotprod: ", t(Dx)%*%Dg))
tau_s <- norm(Dx,'f')^2 / dotprod # First BB stepsize rule
tau_m <- dotprod / norm(Dg,'f')^2 # Alternate BB stepsize rule
tau_m <- max(tau_m,0)
if (abs(dotprod) < 1e-15) break
if (2*tau_m > tau_s) { # Use "Adaptive" combination of tau_s and tau_m
tau1 <- tau_m
} else {
tau1 <- tau_s - .5*tau_m # Experiment with this param
}
if ( (tau1 <= 0) || is.infinite(tau1) || is.nan(tau1) ) {
tau1 <- tau0*1.5
}
}
if (recordIterates) { # record iterate values
iterates <- iterates[,1:(i+1),drop=FALSE]
}
return(list(x=bestObjectiveIterate,objective=objective[1:(i+1)],fVals=fVals[1:i],
totalBacktracks=totalBacktracks,
residual=residual[1:i],taus=taus[1:i],iterates=iterates))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.