Nothing
bwSelect <- function(data,
type,
level,
bwSeq,
bwFolds, # number of training rounds (or folds in CV)
bwFoldsize, # should default to n_var / bwFolds (then proper stratified CV, but very slow for big datasets)
modeltype,
pbar,
... # arguments for mvar/mgm, passed through by tvmar
)
{
# -------------------- Input Checks -------------------
args <- list(...)
# Input checks specific for mvar()
if(modeltype == 'mvar') {
if(is.null(args$lags)) stop('No lags specified')
if(any(args$lags<1)) stop('Specified lags have to be in {1, 2, ..., n -1}')
if(any(round(args$lags)!=args$lags)) stop('Specified lags have to be positive integers')
}
if(any(bwSeq < 0)) stop('All bandwidth values have to be positive.')
if(missing(bwFolds)) stop('No number of folds specified.')
if(bwFolds<0) stop('Number of folds has to be a positive integer')
if(bwFolds!=round(bwFolds)) stop('Number of folds has to be a positive integer')
if(missing(bwFoldsize)) stop('No fold size specified.')
if(bwFoldsize<0) stop('Fold size has to be a positive integer')
if(bwFolds!=round(bwFolds)) stop('Fold size has to be a positive integer')
# Input checks specific for mvar() / mgm() are passed down to these functions
# ----- Fill in Defaults -----
# browser()
if(missing(bwFolds)) bwFolds <- NULL
if(missing(bwFoldsize)) bwFoldsize <- NULL
if(missing(pbar)) pbar <- TRUE
if(is.null(args$saveData)) args$saveData <- FALSE
if(is.null(args$k)) args$k <- k <- 2
if(is.null(args$overparameterize)) args$overparameterize <- FALSE
# ----- Compute Aux Variables -----
n <- nrow(data)
if(modeltype == 'mvar') n_var <- n - max(args$lags) # this is how many rows there are after transforming the data
p <- ncol(data)
n_bw <- length(bwSeq) # length of bandwidth path
# ----- Checks -----
if(missing(bwSeq)) stop('No bandwidth sequence specified')
if(any(bwSeq <= 0)) stop('All entries in the bandwidth sequence have to be strictly positive')
# ----- Create Output Structure -----
bwSelectObj <- list('call' = NULL,
'bwModels' = NULL,
'fullErrorFolds' = NULL,
'fullError' = NULL,
'meanError' = NULL,
'testsets' = NULL,
'zeroweights' = NULL)
# ----- Copy The Call -----
bwSelectObj$call <- list('data' = NULL,
'type' = type,
'level' = level,
'bwSeq' = bwSeq,
'bwFolds' = bwFolds,
'bwFoldsize' = bwFoldsize,
'modeltype' = modeltype,
'pbar' = pbar)
if(args$saveData) bwSelectObj$call$data <- data
# ----- Progress Bar Business -----
# if(is.null(args$pbar)) args$pbar <- TRUE
# Set up Progress bar
if(pbar==TRUE) pb_bw <- txtProgressBar(min = 0, max = n_bw, initial = 0, char="-", style = 3)
# -------------------- Estimate Path -------------------
l_bw_models <- vector('list', length = n_bw) # Storage
l_performance <- list()
# !!!! IF EBIC REACTIVATED: needs argument 'nEstpoints' !!!!
# if(bwSel == 'EBIC') {
#
# warning('The EBIC does NOT select the optimal bandwidth. It is only implemented for research purposes. Use cross validation instead.')
#
# estpoints <- seq(0, n_var, length = nEstpoints)
#
# # Set up Storage
# l_performance <- vector('list', length = n_bw)
# dummy_estpoints <- vector('list', length = nEstpoints)
# dummy_p <- vector('list', length = p)
# dummy_estpoints <- lapply(dummy_estpoints, function(x) dummy_p)
# l_performance <- lapply(l_performance, function(x) dummy_estpoints)
#
#
# for(i in 1:n_bw) {
#
# l_bw_models[[i]] <- tvmvar(data = data,
# type = type,
# level = level,
# estpoints = estpoints,
# bandwidth = bwSeq[i],
# pbar,
# ...)
#
#
# # Get the p x estpoints x n_bw EBICs out of the tvmvar objects
# for(v in 1:p) {
# for(j in 1:nEstpoints) {
# l_performance[[i]][[j]][[v]] <- l_bw_models[[i]]$tvmodels[[j]]$nodeModels[[v]]$EBIC
# }
# }
#
#
# # Update Progress Bar
# if(args$pbar==TRUE) setTxtProgressBar(pb_bw, i)
#
# }
#
# # ----- Evaluate -----
#
# # Collapse across nodes
# l_perf_time <- lapply(l_performance, function(x) {
# lapply(x, function(y) mean(unlist(y)))
# })
#
# # Collapse across nodes & time points
# l_perf_mean <- lapply(l_perf_time, function(x) mean(unlist(x)))
#
#
# }
# -------------------- Estimate Path: VAR model -------------------
if(modeltype == 'mvar') {
# Define Testset in each fold: we take every kth object in some distance and move the sequence by 1 in each fold
l_testsets <- list()
l_zero_weights <- list()
for(fold in 1:bwFolds) {
l_testsets[[fold]] <- round(seq(fold, n_var - bwFolds + fold, length = bwFoldsize)) # nice!
l_zero_weights[[fold]] <- rep(1, n_var)
l_zero_weights[[fold]] [l_testsets[[fold]]] <- 0
}
# Storage
l_performance <- list()
l_bw_models <- list()
for(i in 1:n_bw) {
# Cross validation scheme
l_foldModels <- list()
l_foldPerform <- list()
for(fold in 1:bwFolds) {
# Fit model on training dataset fold
l_foldModels[[fold]] <- tvmvar(data = data, # full data, test cases are deleted via setting weights to zero
type = type,
level = level,
estpoints = l_testsets[[fold]] / n_var, # since we use [0,1] interval since mgm 1.2-3
bandwidth = bwSeq[i],
pbar = FALSE, # switch off progress bar in tvmvar
zero_weights = l_zero_weights[[fold]], # vector indicating zero weights for test cases
saveModels = TRUE, # otherwise we can't make any predictions
signInfo = FALSE,
...)
# Make Predictions at test-locations
l_foldPerform[[fold]] <- bwSelPredict(data = data,
type = type,
level = level,
obj = l_foldModels[[fold]],
test = l_testsets[[fold]],
lags = args$lags,
modeltype = modeltype,
overparameterize = args$overparameterize,
consec = l_foldModels[[fold]]$call$consec,
k = k,
...)
}
l_bw_models[[i]] <- l_foldModels
# Compute mean performance per Fold on maximal level: time x nodes
# Fill in Performance
l_performance[[i]] <- l_foldPerform
# Update Progress Bar
if(pbar==TRUE) setTxtProgressBar(pb_bw, i)
}
}
# -------------------- Estimate Path: MGM -------------------
if(modeltype == 'mgm') {
# Define Testset in each fold: we take every kth object in some distance and move the sequence by 1 in each fold
l_testsets <- list()
l_zero_weights <- list()
for(fold in 1:bwFolds) {
l_testsets[[fold]] <- round(seq(fold, n - bwFolds + fold, length = bwFoldsize)) # now normalized
l_zero_weights[[fold]] <- rep(1, n)
l_zero_weights[[fold]] [l_testsets[[fold]]] <- 0
}
# Storage
l_performance <- list()
l_bw_models <- list()
for(i in 1:n_bw) {
# Cross validation scheme
l_foldModels <- list()
l_foldPerform <- list()
for(fold in 1:bwFolds) {
# Fit model on training dataset fold
l_foldModels[[fold]] <- tvmgm(data = data, # full data, test cases are deleted via setting weights to zero
type = type,
level = level,
estpoints = l_testsets[[fold]] / n, # since we use [0,1] interval since mgm 1.2-3
bandwidth = bwSeq[i],
pbar = FALSE, # switch off progress bar in tvmvar
zero_weights = l_zero_weights[[fold]], # vector indicating zero weights for test cases
saveModels = TRUE, # otherwise we can't make predictions
signInfo = FALSE,
...)
# Make Predictions at test-locations
l_foldPerform[[fold]] <- bwSelPredict(data = data,
type = type,
level = level,
obj = l_foldModels[[fold]],
test = l_testsets[[fold]],
lags = args$lags,
modeltype = modeltype,
overparameterize = args$overparameterize,
k = args$k,
...)
}
l_bw_models[[i]] <- l_foldModels
# Compute mean performance per Fold on maximal level: time x nodes
# Fill in Performance
l_performance[[i]] <- l_foldPerform
# Update Progress Bar
if(pbar==TRUE) setTxtProgressBar(pb_bw, i)
}
}
# ----- Evaluate: Take mean across folds -----
# Collapse across folds
l_perf_mean <- lapply(l_performance, function(x) {
mean(unlist(lapply(x, function(y) y$error_mean)))
})
l_perf_all <- lapply(l_performance, function(x) {
dum <- lapply(x, function(y) y$errors)
n_dum <- length(dum)
Reduce('+', dum) / n_dum
})
# -------------------- Output -------------------
# Estimated Models
if(is.null(args$saveModels)) {
bwSelectObj$bwModels <- NULL
} else if(args$saveModels) {
bwSelectObj$bwModels <- l_bw_models
} else {
bwSelectObj$bwModels <- NULL
}
# Testsets
bwSelectObj$testsets <- l_testsets
bwSelectObj$zeroweights <- l_zero_weights
# Error Output
bwSelectObj$fullErrorFolds <- l_performance
bwSelectObj$fullError <- l_perf_all
bwSelectObj$meanError <- unlist(l_perf_mean)
names(bwSelectObj$meanError) <- bwSeq
class(bwSelectObj) <- c(modeltype, 'bwSelect')
return(bwSelectObj)
} # eoF
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.