## Functions for interaction between kcde estimation and prediction routines and kernel functions
##
## initialize_theta
## extract_vectorized_theta_est_from_theta
## update_theta_from_vectorized_theta_est
## compute_kernel_values
## simulate_values_from_product_kernel
#' Initialize parameter values.
#'
#' @param prev_theta previous theta values before update to variables and lags
#' @param updated_vars_and_offsets lags after update
#' @param update_var_name character; the name of the variable added/removed from the model
#' @param update_offset_value integer; the offset that was added/removed from the model
#' @param update_offset_type integer; the type of the offset that was added/removed from the model
#' @param data data matrix
#' @param kcde_control list of control parameters for kcde
#'
#' @return list of theta parameters
initialize_theta <- function(prev_theta,
updated_vars_and_offsets,
update_var_name,
update_offset_value,
update_offset_type,
data,
kcde_control) {
theta <- lapply(seq_along(kcde_control$kernel_components),
function(ind) {
updated_var_and_offset_in_current_component <- any(sapply(seq_along(update_var_name),
function(update_ind) {
any(kcde_control$kernel_components[[ind]]$vars_and_offsets$var_name == update_var_name[update_ind] &
kcde_control$kernel_components[[ind]]$vars_and_offsets$offset_value == update_offset_value[update_ind] &
kcde_control$kernel_components[[ind]]$vars_and_offsets$offset_type == update_offset_type[update_ind])
}))
# if(updated_var_and_offset_in_current_component ||
# is.null(prev_theta[[ind]])) {
if(updated_var_and_offset_in_current_component) {
potential_cols_in_component <-
kcde_control$kernel_components[[ind]]$vars_and_offsets$combined_name
cols_used <- colnames(data) %in% potential_cols_in_component
if(any(cols_used)) {
fn_name <- kcde_control$kernel_components[[ind]]$
initialize_kernel_params_fn
fn_args <- kcde_control$kernel_components[[ind]]$
initialize_kernel_params_args
fn_args$total_num_vars <- nrow(updated_vars_and_offsets)
fn_args$update_var_name <- update_var_name
fn_args$update_offset_value <- update_offset_value
fn_args$update_offset_type <- update_offset_type
fn_args$kcde_control <- kcde_control
fn_args$x <- data[, cols_used, drop=FALSE]
## the business with temp_prev_theta here is necessary
## because on the first call for a given kernel component,
## prev_theta may be NULL, but on later calls, prev_theta
## will have already been initialized with the values in
## theta_fixed
temp_prev_theta <- prev_theta[[ind]]
temp_prev_theta[
names(kcde_control$kernel_components[[ind]]$theta_fixed)
] <- kcde_control$kernel_components[[ind]]$theta_fixed
fn_args$prev_theta <- temp_prev_theta
return(do.call(fn_name, fn_args))
} else {
return(NULL)
}
} else {
return(prev_theta[[ind]])
}
}
)
return(theta)
}
#' Get vectors of lower and upper bounds for theta parameters to be used in
#' calls to optim.
#'
#' @param theta list of parameters theta that are being estimated
#' @param kcde_control list of control parameters for kcde
#'
#' @return list with two components: lower and upper, giving vectors
#' with lower and upper bounds for possible parameter values
#' Initialize parameter values.
get_theta_optim_bounds <- function(theta,
kcde_control) {
lower <- NULL
upper <- NULL
for(ind in seq_along(kcde_control$kernel_components)) {
## parameters that are being estimated
if(!is.null(theta[[ind]])) {
temp <- do.call(
kcde_control$kernel_components[[ind]]$
get_theta_optim_bounds_fn,
c(list(theta = theta[[ind]]),
kcde_control$kernel_components[[ind]]$
get_theta_optim_bounds_args)
)
lower <- c(lower, temp$lower)
upper <- c(upper, temp$upper)
}
}
return(list(
lower = lower,
upper = upper
))
}
#' Extract a vector of parameter values that are to be estimated from theta,
#' on the estimation scale.
#'
#' @param theta_list kernel parameters theta in list form
#' @param lags list representing combinations of variables and lags
#' included in the model
#' @param kcde_control control parameters for the kcde fit
#'
#' @return numeric vector with parameter values
extract_vectorized_theta_est_from_theta <- function(theta,
vars_and_offsets,
kcde_control) {
theta_vector <- c()
for(ind in seq_along(kcde_control$kernel_components)) {
## parameters that are being estimated
if(!is.null(theta[[ind]])) {
theta_vector <- c(theta_vector, do.call(
kcde_control$kernel_components[[ind]]$vectorize_kernel_params_fn,
c(list(theta_list = theta[[ind]],
vars_and_offsets = vars_and_offsets),
kcde_control$kernel_components[[ind]]$vectorize_kernel_params_args)
))
}
}
return(theta_vector)
}
#' Convert theta from vector form to list form
#'
#' @param theta_est_vector numeric vector of kernel parameters theta that are
#' being estimated, on estimation scale.
#' @param kcde_control control parameters for the kcde fit
#'
#' @return list of lists of parameter values -- outer list has one component
#' for each kernel function, inner list has one component
#' for each parameter used in the corresponding kernel function
update_theta_from_vectorized_theta_est <- function(theta_est_vector,
theta,
kcde_control) {
theta_vector_component_start_ind <- 1L
for(ind in seq_along(kcde_control$kernel_components)) {
## parameters that are being estimated
if(!is.null(theta[[ind]])) {
temp <- do.call(
kcde_control$kernel_components[[ind]]$
update_theta_from_vectorized_theta_est_fn,
c(list(theta_est_vector = theta_est_vector[
seq(from = theta_vector_component_start_ind,
to = length(theta_est_vector))],
theta = theta[[ind]]),
kcde_control$kernel_components[[ind]]$
update_theta_from_vectorized_theta_est_args)
)
theta_vector_component_start_ind <- theta_vector_component_start_ind +
temp$num_theta_vals_used
theta[[ind]] <- temp$theta
}
}
return(theta)
}
#' Compute kernel values measuring the similarity of each row in the
#' train_lagged_obs data frame to the prediction_lagged_obs.
#'
#' @param train_obs a data frame with lagged observation vectors computed
#' from the training data
#' @param prediction_obs a data frame with the lagged observation vector
#' computed from the prediction data. It is assumed that
#' prediction_lagged_obs contains only one row.
#' @param theta a list with one component for each component of the kernel
#' function. This component is a named list with arguments to the
#' corresponding kernel function.
#' @param kcde_control a list of kcde_control parameters for kcde
#' @param log boolean; if TRUE (default), return kernel values on the log scale
compute_kernel_values <- function(train_obs,
prediction_obs,
kernel_components,
theta,
log = TRUE) {
if(!(nrow(train_obs) == 1L || nrow(prediction_obs) == 1L)) {
stop("In call to compute_kernel_values, either train_obs or prediction_obs must have exactly 1 row.")
}
## create a matrix of log kernel values by component
## rows correspond to time points in train_obs, columns to components of
## the kernel function
result_length <- max(nrow(train_obs), nrow(prediction_obs))
log_kernel_component_values <- matrix(0,
nrow = result_length,
ncol = length(kernel_components))
for(ind in seq_along(kernel_components)) {
combined_names_in_component <-
kernel_components[[ind]]$vars_and_offsets$combined_name
col_names <- colnames(train_obs)[
colnames(train_obs) %in% combined_names_in_component]
if(length(col_names) > 0) {
## assemble arguments to kernel function
kernel_fn_args <- theta[[ind]]
kernel_fn_args$x <- prediction_obs[, col_names, drop = FALSE]
kernel_fn_args$center <- train_obs[, col_names, drop = FALSE]
kernel_fn_args$log <- TRUE
## call kernel function and store results
log_kernel_component_values[, ind] <-
do.call(kernel_components[[ind]]$kernel_fn,
kernel_fn_args)
}
}
## return on scale requested by user
## these computations assume product kernel --
## if we're doing something else, change apply(..., sum)
if(log) {
return(apply(log_kernel_component_values, 1, sum))
} else {
return(exp(apply(log_kernel_component_values, 1, sum)))
}
}
#' Simulate values from (conditional) kernel.
#'
#' @param train_obs a data frame with lagged observation vectors computed
#' from the training data
#' @param prediction_obs a data frame with the lagged observation vector
#' computed from the prediction data. It is assumed that
#' prediction_lagged_obs contains only one row.
#' @param theta a list with one component for each component of the kernel
#' function. This component is a named list with arguments to the
#' corresponding kernel function.
#' @param kcde_control a list of kcde_control parameters for kcde
#' @param log boolean; if TRUE (default), return kernel values on the log scale
simulate_values_from_product_kernel <- function(n,
conditioning_obs,
center,
kernel_components,
theta) {
conditioning_obs_missing <- missing(conditioning_obs) || is.null(conditioning_obs)
if(!(conditioning_obs_missing || nrow(conditioning_obs) == 1L) || nrow(center) != 1L) {
stop("In call to simulate_values_from_product_kernel, conditioning_obs and center must have exactly 1 row.")
}
## create a matrix to contain simulated values
## rows correspond to simulations, columns to variables simulated
simulated_values <- matrix(NA,
nrow = n,
ncol = ncol(center))
colnames(simulated_values) <- colnames(center)
for(ind in seq_along(kernel_components)) {
combined_names_in_component <-
kernel_components[[ind]]$vars_and_offsets$combined_name
center_col_names <- colnames(center)[
colnames(center) %in% combined_names_in_component]
if(conditioning_obs_missing) {
conditioning_col_names <- NULL
} else {
conditioning_col_names <- colnames(conditioning_obs)[
colnames(conditioning_obs) %in% combined_names_in_component]
}
## Fill in conditioning observations if there are any from this kernel component
## that are in the center vector.
if(length(conditioning_col_names) > 0 &&
any(conditioning_col_names %in% center_col_names)) {
simulated_values[, conditioning_col_names[conditioning_col_names %in% center_col_names]] <-
rep(as.vector(as.matrix(conditioning_obs[, conditioning_col_names[conditioning_col_names %in% center_col_names]])), each = n)
}
## Only need to sample from this kernel component if there are any variables used that
## - are included in this kernel component, and
## - aren't conditioning variables
if(length(center_col_names) > 0 && any(!(center_col_names %in% conditioning_col_names))) {
## assemble arguments to kernel function
rkernel_args <- theta[[ind]]
rkernel_args$n <- n
if(!conditioning_obs_missing) {
rkernel_args$conditioning_obs <-
conditioning_obs[, conditioning_col_names, drop = FALSE]
}
rkernel_args$center <- center[, center_col_names, drop = FALSE]
## call kernel function and store results
simulated_values[, center_col_names] <-
do.call(kernel_components[[ind]]$rkernel_fn,
rkernel_args)[, center_col_names]
}
}
return(simulated_values)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.