## Kernel functions
##
## periodic_kernel
## get_col_inds_continuous_dicrete_vars_used
## pdtmvn_kernel
## compute_pdtmvn_kernel_bw_params_from_bw_eigen
## vectorize_params_pdtmvn_kernel
## update_theta_from_vectorized_theta_est_pdtmvn_kernel
## initialize_params_pdtmvn_kernel
#' Periodic kernel function
#'
#' @param x a vector of values at which to evaluate the kernel function
#' @param center a real number, center point for the kernel function
#' @param period kernel period
#' @param bw kernel bandwidth
#'
#' @return vector of the kernel function value at each point in x
periodic_kernel <- function(x, center, period, bw, log) {
result <- -0.5 * (sin(period * (x - center)) / bw)^2
if(log) {
return(result)
} else {
return(exp(result))
}
}
#' Create two integer vectors with indices of columns in x corresponding to
#' continuous variables and discrete variables. These integer vectors are
#' suitable for passing to functions from the pdtmvn function.
#'
#' @param x_colnames character vector with column names of x
#' @param continuous_vars character vector with names of columns in x that
#' should be treated as continuous variables
#' @param discrete_vars character vector with names of columns in x that
#' should be treated as discrete variables
#'
#' @return list with two components: continuous_vars is an integer vector of
#' columns in x corresponding to continuous variables and discrete_vars is
#' an integer vector of columns in x corresponding to discrete variables
get_col_inds_continuous_discrete_vars_used <- function(x_colnames,
continuous_vars,
discrete_vars) {
if(is.null(x_colnames)) {
stop("x must have column names")
}
col_inds_continuous_vars <- seq_along(x_colnames)[x_colnames %in% continuous_vars]
col_inds_discrete_vars <- seq_along(x_colnames)[x_colnames %in% discrete_vars]
return(list(
continuous_vars = col_inds_continuous_vars,
discrete_vars = col_inds_discrete_vars
))
}
#' Evaluate the kernel function given by the pdtmvn distribution.
#'
#' @param a matrix of values at which to evaluate the kernel function, with
#' column names specified. Each row is an observation, each column is an
#' observed variable.
#' @param center a real vector, center point for the kernel function
#' @param bw bandwidth matrix
#' @param bw_continuous the portion of bw corresponding to continuous variables
#' @param conditional_bw_discrete the Schur complement of the portion of bw
#' corresponding to discrete variables
#' @param conditional_center_discrete_offset_multiplier Sigma_dc Sigma_c^{-1}.
#' This is used in computing the mean of the underlying multivariate normal
#' distribution for the discrete variables conditioning on the continuous
#' variables.
#' @param continuous_vars character vector with names of variables that are to
#' be treated as continuous. May contain entries that do not appear in
#' colnames(x).
#' @param discrete_vars character vector with names of variables that are to be
#' treated as discrete. May contain entries that do not appear in
#' colnames(x)
#' @param discrete_var_range_fns a list with one entry for each element
#' of discrete_vars. Each entry is a named list of length 2; the element
#' named "a" is a character string with the name of a function that returns
#' a(x) for any real x, and the element named "b" is a character string with
#' the name of a function that returns b(x) for any real x.
#' @param lower Vector of lower truncation points
#' @param upper Vector of upper truncation points
#' @param log logical; if TRUE, return the log of the kernel function value
#' @param ... mop up extra arguments
#'
#' @return the value of the kernel function given by the pdtmvn distribution
#' at x.
pdtmvn_kernel <- function(x,
center,
bw,
bw_continuous,
conditional_bw_discrete,
conditional_center_discrete_offset_multiplier,
continuous_vars,
discrete_vars,
continuous_var_col_inds,
discrete_var_col_inds,
discrete_var_range_fns,
lower,
upper,
log,
...) {
if(length(dim(center)) > 0) {
center <- as.vector(as.matrix(center))
}
return(pdtmvn::dpdtmvn(x = x,
mean = center,
sigma = bw,
sigma_continuous = bw_continuous,
conditional_sigma_discrete = conditional_bw_discrete,
conditional_mean_discrete_offset_multiplier =
conditional_center_discrete_offset_multiplier,
lower = lower,
upper = upper,
continuous_vars = continuous_var_col_inds,
discrete_vars = discrete_var_col_inds,
discrete_var_range_fns = discrete_var_range_fns[discrete_vars],
log = log,
validate_level = 1L))
}
#' Simulate from the kernel function given by the pdtmvn distribution.
#'
#' @param n number of simulations to generate
#' @param a matrix of values at which to evaluate the kernel function, with
#' column names specified. Each row is an observation, each column is an
#' observed variable.
#' @param center a real vector, center point for the kernel function
#' @param bw bandwidth matrix
#' @param bw_continuous the portion of bw corresponding to continuous variables
#' @param conditional_bw_discrete the Schur complement of the portion of bw
#' corresponding to discrete variables
#' @param conditional_center_discrete_offset_multiplier Sigma_dc Sigma_c^{-1}.
#' This is used in computing the mean of the underlying multivariate normal
#' distribution for the discrete variables conditioning on the continuous
#' variables.
#' @param continuous_vars character vector with names of variables that are to
#' be treated as continuous. May contain entries that do not appear in
#' colnames(x).
#' @param discrete_vars character vector with names of variables that are to be
#' treated as discrete. May contain entries that do not appear in
#' colnames(x)
#' @param discrete_var_range_fns a list with one entry for each element
#' of discrete_vars. Each entry is a named list of length 2; the element
#' named "a" is a character string with the name of a function that returns
#' a(x) for any real x, and the element named "b" is a character string with
#' the name of a function that returns b(x) for any real x.
#' @param lower Vector of lower truncation points
#' @param upper Vector of upper truncation points
#' @param log logical; if TRUE, return the log of the kernel function value
#' @param ... mop up extra arguments
#'
#' @return the value of the kernel function given by the pdtmvn distribution
#' at x.
rpdtmvn_kernel <- function(n,
conditioning_obs,
center,
bw,
bw_continuous,
conditional_bw_discrete,
conditional_center_discrete_offset_multiplier,
continuous_vars,
discrete_vars,
continuous_var_col_inds,
discrete_var_col_inds,
discrete_var_range_fns,
lower,
upper,
log,
...) {
if(length(dim(center)) > 0) {
center_names <- colnames(center)
center <- as.vector(as.matrix(center))
names(center) <- center_names
}
return(pdtmvn::rpdtmvn(n = n,
x_fixed = conditioning_obs,
mean = center,
sigma = bw,
lower = lower,
upper = upper,
continuous_vars = continuous_var_col_inds,
discrete_vars = discrete_var_col_inds,
discrete_var_range_fns = discrete_var_range_fns,
validate_level = 1))
}
#' Compute the parameters bw, bw_continuous, conditional_bw_discrete, and
#' conditional_center_discrete_offset_multiplier for the pdtmvn_kernel function
#' from the eigen-decomposition of the bandwidth matrix.
#'
#' @param bw_evecs matrix whose columns are the eigenvectors of the bandwidth
#' matrix.
#' @param bw_evals vector of eigenvalues of the bandwidth matrix.
#' @param continuous_vars Vector containing column indices for continuous
#' variables.
#' @param discrete_vars Vector containing column indices for discrete variables.
#'
#' @return Named list with four components: bw, bw_continuous,
#' conditional_bw_discrete, and conditional_center_discrete_offset_multiplier
compute_pdtmvn_kernel_bw_params_from_bw_eigen <- function(bw_evecs,
bw_evals,
continuous_var_col_inds,
discrete_var_col_inds) {
bw <- bw_evecs %*% diag(bw_evals) %*% t(bw_evecs)
bw_params <- c(
list(bw = bw,
bw_evecs = bw_evecs,
bw_evals = bw_evals,
log_bw_evals = log(bw_evals)),
pdtmvn::compute_sigma_subcomponents(sigma = bw,
continuous_vars = continuous_var_col_inds,
discrete_vars = discrete_var_col_inds,
validate_level = 0)
)
names(bw_params)[names(bw_params) == "sigma_continuous"] <- "bw_continuous"
names(bw_params)[names(bw_params) == "conditional_sigma_discrete"] <-
"conditional_bw_discrete"
names(bw_params)[names(bw_params) == "conditional_mean_discrete_offset_multiplier"] <-
"conditional_center_discrete_offset_multiplier"
return(bw_params)
}
#' A function to vectorize the parameters of the pdtmvn_kernel and convert
#' to estimation scale.
#' @param theta_list parameters for the pdtmvn_kernel in list format
#' @param parameterization character; currently, only supported value is
#' "bw-diagonalized-est-eigenvalues"
#' @param ... mop up other arguments
#'
#' @return vector containing parameters that are estimated on a scale
#' suitable for numerical optimization
vectorize_params_pdtmvn_kernel <- function(theta_list, parameterization, ...) {
if(identical(parameterization, "bw-diagonalized-est-eigenvalues")) {
return(theta_list$log_bw_evals)
} else {
stop("Invalid parameterization for pdtmvn kernel function")
}
}
#' A function to unvectorize the parameters of the pdtmvn_kernel and convert
#' from estimation scale to scale actually used.
#'
#' @param theta_vector a numeric vector of parameters that are being optimized,
#' on a scale suitable for use in optim.
#' @param parameterization character; currently, only supported value is
#' "bw-diagonalized-est-eigenvalues"
#' @param bw_evecs square matrix with eigenvectors of the bandwidth matrix in
#' columns
#' @param continuous_vars character vector with variables names that are to be
#' treated as continuous
#' @param discrete_vars character vector with variables names that are to be
#' treated as discrete
#' @param ssr_control list of control parameters to ssr
#'
#' @return list of parameters to pdtmvn_kernel
update_theta_from_vectorized_theta_est_pdtmvn_kernel <- function(theta_est_vector, theta, parameterization) {
if(identical(parameterization, "bw-diagonalized-est-eigenvalues")) {
num_bw_evals <- ncol(theta$bw_evecs)
temp <- compute_pdtmvn_kernel_bw_params_from_bw_eigen(bw_evecs = theta$bw_evecs,
bw_evals = exp(theta_est_vector[seq_len(num_bw_evals)]),
theta$continuous_var_col_inds,
theta$discrete_var_col_inds)
theta[names(temp)] <- temp
return(list(
theta = theta,
num_theta_vals_used = num_bw_evals
))
} else {
stop("Invalid parameterization for pdtmvn kernel function")
}
}
#' Get initial parameter values for the pdtmvn_kernel
#'
#' @param parameterization Character vector of length 1; the parameterization to
#' use. Currently, the only supported option is
#' "bw-diagonalized-est-eigenvalues"
#' @param x a matrix of values used to initialize the parameter values for the
#' kernel function. Each row is an observation, each column is an
#' observed variable.
#' @param continuous_vars character vector with variables names that are to be
#' treated as continuous
#' @param discrete_vars character vector with variables names that are to be
#' treated as discrete
#' @param ssr_control list of control parameters to ssr
#' @param ... used to absorb other arguments in the function call
#'
#' @return list with initial values of parameters to the pdtmvn_kernel
initialize_params_pdtmvn_kernel <- function(parameterization,
x,
continuous_vars,
discrete_vars,
discrete_var_range_fns,
lower,
upper,
...) {
require(robust)
if(identical(parameterization, "bw-diagonalized-est-eigenvalues")) {
continuous_discrete_var_col_inds <-
get_col_inds_continuous_discrete_vars_used(colnames(x),
continuous_vars,
discrete_vars)
sample_cov_hat <- robust::covRob(x)$cov
sample_cov_eigen <- eigen(sample_cov_hat)
return(c(
compute_pdtmvn_kernel_bw_params_from_bw_eigen(bw_evecs = sample_cov_eigen$vectors,
bw_evals = sample_cov_eigen$values,
continuous_discrete_var_col_inds$continuous_vars,
continuous_discrete_var_col_inds$discrete_vars),
list(
continuous_vars = continuous_vars,
discrete_vars = discrete_vars,
continuous_var_col_inds = continuous_discrete_var_col_inds$continuous_vars,
discrete_var_col_inds = continuous_discrete_var_col_inds$discrete_vars,
discrete_var_range_fns = discrete_var_range_fns,
lower = lower[colnames(x)],
upper = upper[colnames(x)])
))
} else {
stop("Invalid parameterization for pdtmvn kernel function")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.