## This file contains functions that handle lambda and b searches
## Functions:
## lambdabsearch
## bsearch
## lambdasearch
## lambdaline (lambdasearch in the old KRLS)
## looloss
## This function searches for both lambda and b
#' @export
lambdabsearch <- function(y,
X,
control) {
if(is.null(control$lambdarange)) {
if (!is.null(control$brange))
stop("Grid search for b only works with fixed lambda or lambda grid search.")
## Joint optimization using BFGS of lambda and b
fit.hyper <- optim(par=log(c(control$lambdastart, control$bstart)), lambdab.fn,
X=X, y=y, folds=length(control$chunks),
chunks = control$chunks, ctrl = control,
vcov = FALSE,
control=list(trace = T, abstol = 1e-4), method="BFGS")
lambda <- exp(fit.hyper$par[1])
b <- exp(fit.hyper$par[2])
} else {
if (!is.null(control$bstart))
stop("Numerically optimizing b does not work with a grid search for lambda.")
## Grid search over lambda and b
hypergrid <- as.matrix(expand.grid(control$lambdarange, control$brange))
hyperMSE <- NULL
for(i in 1:nrow(hypergrid)){
hyperMSE[i] <- lambdab.fn(par = log(hypergrid[i, ]), X=X,
y=y, folds=length(control$chunks),
chunks = control$chunks, ctrl = control)
}
hyper <- hypergrid[which.min(hyperMSE), ]
lambda <- hyper[1]
b <- hyper[2]
}
return(list(lambda=lambda,
b = b))
}
## This function governs searching for lambda with a fixed b
#' @export
lambdasearch <- function(y,
X,
Kdat,
control,
b) {
if (control$loss == "leastsquares") {
## Least squares golden section search
## todo: just replace with optimize? test output + efficiency
if (control$weight) {
lambda <- lambdaline(y=y, D=Kdat$D, U=Kdat$U, w=control$w, tol=control$tol, noisy = control$printlevel > 0)
} else {
lambda <- lambdaline(y=y, D=Kdat$D, U=Kdat$U, tol=control$tol, noisy = control$printlevel > 0)
}
} else {
if (is.null(control$lambdarange)) {
## Logistic optimize for lambda
fit.lambda <- optimize(lambdab.fn, interval=log(control$lambdainterval),
Kdat = Kdat, y=y, folds=length(control$chunks),
chunks = control$chunks, ctrl = control,
b = b, tol = .Machine$double.eps^0.2,
vcov = FALSE)
# Test for closeness in log scale or else this test is too sensitive in smaller regions of lambda
solution_on_interval <- abs(fit.lambda$minimum - log(control$lambdainterval)) < 2*.Machine$double.eps^.5
if (solution_on_interval[1])
warning("Lambda solution too close to lower bound of lambdainterval; please decrease lower bound.")
if (solution_on_interval[2])
warning("Lambda solution too close to upper bound of lambdainterval; please increase upper bound.")
fit.lambda$par <- fit.lambda$minimum
if (control$printlevel > 0) {
print(fit.lambda)
}
lambda <- exp(fit.lambda$par)
} else {
## line search for lambda
lambdaMSE <- NULL
for(i in 1:length(control$lambdarange)){
lambdaMSE[i] <- lambdab.fn(par = log(control$lambdarange[i]), Kdat = Kdat,
y=y, folds=length(control$chunks),
chunks = control$chunks, ctrl = control,
b = b)
}
lambda <- control$lambdarange[which.min(lambdaMSE)]
}
}
return(lambda)
}
## This function searches for just b
#' @export
bsearch <- function(y,
X,
control,
lambda) {
if(is.null(control$brange)) {
fit.b <- optimize(lambdab.fn, interval=log(control$binterval),
Kdat = Kdat, X=X, y=y, folds=length(control$chunks),
chunks = control$chunks, ctrl = control,
lambda = lambda,
vcov = FALSE)
b <- exp(fit.b$minimum)
} else {
bMSE <- NULL
for(i in 1:length(control$brange)){
if(control$brange[i] < 0) stop("All bs must be positive")
bMSE[i] <- lambdab.fn(par = log(control$brange[i]), X=X,
y=y, folds=length(control$chunks),
chunks = control$chunks, ctrl = control,
#b = b,
lambda = lambda)
b <- control$brange[which.min(bMSE)]
}
}
return(b)
}
## This computes the RMSPE using 'folds' cross-validation, fitting a new model
## for each subset of the data, but not refitting the kernel matrix unless looking for b
#' @export
lambdab.fn <- function(par = NULL,
y = NULL,
X = NULL,
Kdat = NULL,
folds = NULL,
chunks = NULL,
b = NULL,
lambda = NULL,
vcov = NULL,
ctrl = NULL) {
if(is.null(c(lambda, b))) {
lambda <- exp(par[1])
b <- exp(par[2])
Kdat <- generateK(X=X,
b=b,
control=ctrl)
} else {
if (is.null(lambda)) {
lambda <- exp(par[1])
}
if (is.null(b)) {
b <- exp(par)
Kdat <- generateK(X=X,
b=b,
control=ctrl)
}
}
pars <- rep(0, ncol(Kdat$U) + 1)
loss <- 0
for(j in 1:folds){
fold <- chunks[[j]]
UFold <- Kdat$U[-fold, , drop = F] # to make sure it stays a matrix, which CPP demands
out <- getDhat(par = pars,
y=y[-fold],
U=UFold,
D=Kdat$D,
w=ctrl$w[-fold],
lambda=lambda,
ctrl=ctrl,
printopt=ctrl$printlevel > 1)
if(ctrl$loss == "leastsquares") {
yhat <- Kdat$U[fold, ] %*% out$dhat
loss <- loss + sum((y[fold] - yhat)^2)
} else if (ctrl$loss == "logistic") {
yhat <- logistic(Kdat$U[fold, , drop = F], out$dhat, out$beta0hat)
yhat[yhat==1]=1-1e-12
yhat[yhat==0]=0+1e-12
loss <- loss - sum(y[fold]*log(yhat)+(1-y[fold])*log(1-yhat))
}
}
if(ctrl$loss == "leastsquares") {
loss <- sqrt(loss/length(y))
} else if(ctrl$loss == 'logistic') {
loss <- loss/length(y)
}
if(ctrl$printlevel > 0){
if(!is.null(ctrl$bstart)) cat(paste0("b: ", b, "\n"))
cat(paste0("lambda: ", lambda, "\n"))
cat(paste0("loss: ", loss, "\n"))
}
return(loss)
}
## Lambda search for KRLS
#' @export
lambdaline <-
function(Lbound=NULL,
Ubound=NULL,
y=NULL,
U=NULL,
D=NULL,
w=NULL,
tol=NULL,
noisy=FALSE){
n <- length(y)
if(is.null(tol)){
tol <- 10^-3 * n
} else {
stopifnot(is.vector(tol),
length(tol)==1,
is.numeric(tol),
tol>0)
}
# get upper bound starting value
if(is.null(Ubound)){
Ubound <- n
while(sum(D / (D + Ubound)) < 1){
Ubound <- Ubound-1
}
} else {
stopifnot(is.vector(Ubound),
length(Ubound)==1,
is.numeric(Ubound),
Ubound>0)
}
# get lower bound starting value
if(is.null(Lbound)){
q <- which.min(abs(D - (max(D)/1000)))
#Lbound <- 0
Lbound = .Machine$double.eps #CJH: to avoid Inf in next statement
while(sum(D / (D + Lbound)) > q){
Lbound <- Lbound+.05
}
} else {
stopifnot(is.vector(Lbound),
length(Lbound)==1,
is.numeric(Lbound),
Lbound>=0)
}
# create new search values
X1 <- Lbound + (.381966)*(Ubound-Lbound)
X2 <- Ubound - (.381966)*(Ubound-Lbound)
# starting LOO losses
S1 <- looloss(lambda=X1,y=scale(y),U=U, D=D,w=w)
S2 <- looloss(lambda=X2,y=scale(y),U=U, D=D,w=w)
if(noisy){cat("Lbound:",Lbound,"X1:",X1,"X2:",X2,"Ubound:",Ubound,"S1:",S1,"S2:",S2,"\n") }
while(abs(S1-S2)>tol){ # terminate if difference between S1 and S2 less than tolerance
# update steps and use caching
if(S1 < S2){
Ubound <- X2
X2 <- X1
X1 <- Lbound + (.381966)*(Ubound-Lbound)
S2 <- S1
S1 <- looloss(lambda=X1,y=scale(y),U=U, D=D,w=w)
} else { #S2 < S1
Lbound <- X1
X1 <- X2
X2 <- Ubound - (.381966)*(Ubound-Lbound)
S1 <- S2
S2 <- looloss(lambda=X2,y=scale(y),U=U, D=D,w=w)
}
if(noisy){cat("Lbound:",Lbound,"X1:",X1,"X2:",X2,"Ubound:",Ubound,"S1:",S1,"S2:",S2,"\n") }
}
out <- ifelse(S1<S2,X1,X2)
if(noisy){cat("Lambda:",out,"\n")}
return(invisible(out))
}
## looloss for krls
#' @export
looloss <-
function(y=NULL,K=NULL,D=NULL,U=NULL,w=NULL,lambda=NULL){
if(is.null(w)) {
Le <- solve_for_d_ls(y=y,D=D,U=U,lambda=lambda)$Le
} else {
Le <- solve_for_d_ls_w(y=y,D=D,U=U,w=w,lambda=lambda)$Le
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.