## Functions to set up the estimation routine for kcde
##
## create_kcde_control
## create_kcde_control_default
## get_default_kernel_components
## validate_kcde_control
#' Assemble a list of kcde_control parameters for the kcde function with
#' user-specified values.
#'
#' @param X_names a character vector of length >= 1 containing names of
#' variables in the data data frame to use in forming the lagged
#' observation process used for calculating weights
#' @param y_names a character vector of length 1 containing the name of the
#' variable in the data data frame to use as the target for prediction
#' @param prediction_horizons integer vector: the number of time steps between
#' the last observation and the time at which we make a prediction
#' @param kernel_components a list with one component for each component of the
#' kernel function. Each component is a list with the following entries:
#' - vars_and_lags: a data frame with two columns: "var_name" and
#' "lag_value". Each row specifies a combination of variable and lag
#' that is included in this component of the kernel function.
#' - kernel_fn: a function to evaluate the kernel
#' - theta_fixed: a named list of parameters to kernel_fn whose values are
#' held fixed (i.e., not estimated)
#' - theta_est: a named list of parameters to kernel_fn whose values are
#' to be estimated
#' - initialize_theta_fn: a function to initialize both theta_fixed and
#' theta_est
#' - initialize_theta_args: a named list of arguments to
#' initialize_theta_fn
#' - vectorize_theta_est: a function that converts theta_est into an
#' ordered vector on a scale suitable for passing as the first argument
#' to optim. Required to return a list with three components:
#' (1) theta_est - vector of parameters to be estimated
#' (2) lb - vector of lower bounds to theta_est
#' (3) ub - vector of upper bounds to theta_est
#' - update_theta_from_vectorized_theta_est: a function that updates
#' theta_est (in list form) from theta_est (in vector form).
#' @param crossval_buffer during cross-validation, the number of indices before
#' the time at which we are making a prediction to drop from the "training
#' examples".
#' @param prediction_inds_not_included an integer vector specifying indices of
#' the data set that is provided in estimation that should not be included
#' in the calculation of the loss. The observations at these indices may
#' still be included as conditioning variables used to make predictions at
#' other times.
#' @param loss_fn_name a string giving the name of the function use to
#' compute loss from predictions
#' @param loss_args a named list giving arguments to the loss function
#' @param variable_selection_method a character string specifying the method used to determine variable inclusion
#' @param par_packages a character vector containing names of packages that need
#' to be loaded in instances of R when computations are performed in
#' parallel.
#' @param na.action a character string specifying how NA values should be handled.
#'
#' @return the (at this point, unvalidated) list of kcde_control parameters
create_kcde_control <- function(X_names,
y_names,
time_name,
prediction_horizons,
kernel_components,
filter_control,
crossval_buffer,
prediction_inds_not_included,
loss_fn,
loss_fn_prediction_args,
loss_args,
variable_selection_method = "stepwise",
par_packages = NULL,
par_cores = 1L,
na.action = "na.omit") {
kcde_control <- list()
kcde_control$X_names <- X_names
kcde_control$y_names <- y_names
kcde_control$time_name <- time_name
kcde_control$prediction_horizons <- prediction_horizons
kcde_control$kernel_components <- kernel_components
kcde_control$filter_control <- filter_control
kcde_control$crossval_buffer <- crossval_buffer
kcde_control$prediction_inds_not_included <- prediction_inds_not_included
kcde_control$loss_fn <- loss_fn
kcde_control$loss_fn_prediction_args <- loss_fn_prediction_args
kcde_control$loss_args <- loss_args
kcde_control$variable_selection_method <- variable_selection_method
kcde_control$par_packages <- par_packages
kcde_control$par_cores <- par_cores
kcde_control$na.action <- na.action
return(kcde_control)
}
#' Assemble a list of kcde_control parameters for the kcde function with default
#' values
#'
#' @param X_names a character vector of length >= 1 containing names of
#' variables in the data data frame to use in forming the lagged
#' observation process used for calculating weights
#' @param y_names a character vector of length 1 containing the name of the
#' variable in the data data frame to use as the target for prediction
#' @param data a data frame where rows are consecutive observations
#'
#' @return the list of kcde_control parameters
create_kcde_control_default <- function(X_names, y_names, data) {
kcde_control <- list()
kcde_control$X_names <- X_names
kcde_control$y_names <- y_names
kcde_control$kernel_components <- get_default_kernel_components(X_names,
y_names,
time_name,
data)
kcde_control$loss_fn_name <- "mase"
kcde_control$loss_fn_args <- list()
return(kcde_control)
}
#' Get default kernel functions based on a brief look at the data. This is
#' unreliable. Update to return periodic_kernel if X_names[i] == time_name?
#'
#' @param X_names a character vector of length >= 1 containing names of
#' variables in the data data frame to use in forming the lagged
#' observation process used for calculating weights
#' @param y_names a character vector of length 1 containing the name of the
#' variable in the data data frame to use as the target for prediction
#' @param time_name (optional) a character vector of length 1 containing the
#' name of the variable in the data data frame to use as the time.
#' @param data a data frame where rows are consecutive observations
#'
#' @return a list of default parameters for kernel components -- probably all bad
get_default_kernel_components <- function(X_names, y_names, data) {
stop("Function get_default_kernel_components is not yet implemented")
return(kernel_components)
}
#' Validate kcde_control parameters for kcde -- not implemented
#'
#' @param kcde_control a list of kcde_control parameters for kcde
#' @param X_names a character vector of length >= 1 containing names of
#' variables in the data data frame to use in forming the lagged
#' observation process used for calculating weights
#' @param y_names a character vector of length 1 containing the name of the
#' variable in the data data frame to use as the target for prediction
#' @param time_name (optional) a character vector of length 1 containing the
#' name of the variable in the data data frame to use as the time.
#' @param data a data frame where rows are consecutive observations
#'
#' @return no return value -- either stops with an error or not.
validate_kcde_control <- function(kcde_control, X_names, y_names, time_name, data) {
# warning("kcde kcde_control parameter validation not yet implemented")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.