# Ridge aggregation rule with automatic calibration of smoothing parameters
ridgeCalib <- function(y, experts, grid.lambda = 1, w0 = NULL, gamma = 2,
training = NULL, quiet = FALSE) {
experts <- as.matrix(experts)
N <- ncol(experts) # Number of experts
T <- nrow(experts) # Number of instants
# Uniform intial weight vector if unspecified
if (is.null(w0)) {
w0 <- matrix(1/N, ncol = N)
}
if (is.null(grid.lambda)) {
grid.lambda <- 1
}
if (is.null(gamma)) {
gamma <- 2
}
# Smoothing parameter grid
nlambda <- length(grid.lambda)
grid.lambda <- matrix(grid.lambda, nrow = nlambda)
cumulativeLoss <- rep(0, nlambda)
weights <- matrix(0, ncol = N, nrow = T) # Matrix of weights formed by the mixture
prediction <- rep(0, T) # Vector of predictions formed by the mixing algorithm
if (!is.null(training)) {
At <- training$At
bt <- training$bt
bestlambda <- training$bestlambda
wlambda <- training$wlambda
w0 <- training$w0
cumulativeLoss <- training$cumulativeLoss
T0 <- training$T
} else {
At <- list()
for (k in 1:nlambda){
At[[k]] <- diag(0,N) / grid.lambda[k]
}
bt <- rep(0, N)
bestlambda <- floor(nlambda)/2 + 1 # We start with the parameter in the middle of the grid
wlambda <- array(w0, dim = c(N, nlambda)) # Weight matrix proposed by each Ridge(lambda) where lambda is a parameter of the grid
T0 <- 0
}
lambda <- rep(grid.lambda[bestlambda], T)
if (! quiet) steps <- init_progress(T)
for (t in 1:T) {
if (! quiet) update_progress(t, steps)
# Weights, prediction formed by Ridge(lambda[t]) where lambda[t] is the learning
# rate calibrated online
weights[t, ] <- wlambda[, bestlambda]
prediction[t] <- experts[t, ] %*% weights[t, ]
lambda[t] <- grid.lambda[bestlambda]
# Weights, predictions formed by each Ridge(lambda) for lambda in the grid
# 'grid.lambda'
cumulativeLoss <- cumulativeLoss + c(experts[t, ] %*% wlambda - y[t])^2
# Grid update **************
bestlambda <- order(cumulativeLoss)[1] # find the best smoothing rate lambda in the grid
bt <- bt + y[t] * experts[t, ]
# Parameter update
for (k in 1:nlambda){
a <- At[[k]] %*% experts[t, ]
At[[k]] <- At[[k]] - a %*% t(a) / c(1 + experts[t,] %*% a)
}
# Expand the grid if the best parameter lies on an extremity
if (bestlambda == nlambda) {
newlambda <- grid.lambda[bestlambda] * gamma^(1:3)
grid.lambda <- c(grid.lambda, newlambda)
for (k in 1:length(newlambda)) {
perfnewlambda <- tryCatch(ridge(y = c(training$oldY, y[1:t]), experts = rbind(training$oldexperts,
matrix(experts[1:t, ], ncol = N)), lambda = newlambda[k], w0 = w0, quiet = TRUE),
error = function(e) {
list(prediction = rep(0, t))
})
newcumulativeLoss <- sum((perfnewlambda$prediction - c(training$oldY, y[1:t]))^2)
At[[nlambda + k]] <- perfnewlambda$training$At
cumulativeLoss <- c(cumulativeLoss, newcumulativeLoss)
}
nlambda <- nlambda + length(newlambda)
}
if (bestlambda == 1) {
newlambda <- grid.lambda[bestlambda]/gamma^(1:3)
nlambda <- nlambda + length(newlambda)
bestlambda <- bestlambda + length(newlambda)
for (k in 1:length(newlambda)) {
grid.lambda <- c(newlambda[k], grid.lambda)
perfnewlambda <- tryCatch(ridge(c(training$oldY, y[1:t]), experts = rbind(training$oldexperts,
matrix(experts[1:t, ], ncol = N)), lambda = newlambda[k], w0 = w0, quiet = TRUE),
error = function(e) {
list(prediction = rep(NA, t))
})
newcumulativeLoss <- sum((perfnewlambda$prediction - c(training$oldY, y[1:t]))^2)
cumulativeLoss <- c(newcumulativeLoss, cumulativeLoss)
At = c(list(perfnewlambda$training$At),At)
}
}
if (nlambda!=ncol(wlambda)){
wlambda <- matrix(0, nrow = N, ncol = nlambda)
}
for (k in 1:nlambda) {
wlambda[, k] <- At[[k]] %*% (grid.lambda[k] * w0 + bt)
}
# the smoothing parameter has to big large enough in order to have invertible
# design matrix
lambda.min <- which(!(is.na(wlambda[1, ])))[1]
bestlambda <- max(lambda.min, bestlambda)
}
if (! quiet) end_progress()
object <- list(model = "Ridge", loss.type = list(name = "square"), coefficients = wlambda[,
bestlambda])
object$parameters <- list(lambda = c(lambda[1:T]), grid.lambda = c(grid.lambda))
object$weights <- weights
object$prediction <- prediction
object$training <- list(T = T0 + T, wlambda = wlambda, w0 = w0, At = At, bt = bt,
bestlambda = bestlambda, cumulativeLoss = cumulativeLoss, grid.loss = cumulativeLoss/(T0 +
T), oldexperts = rbind(training$oldexperts, experts), oldY = c(training$oldY,
y))
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.