R/RcppExports.R
In kernelPSI: Post-Selection Inference for Nonlinear Variable Selection

Documented in adaFOHSIC adaQ FOHSIC forwardQ HSIC quadHSIC sampleH

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' adaptively selects a subset of kernels in a forward fashion.
#'
#' This function is similar to the \code{\link{FOHSIC}} function. The only
#' difference lies in the adaptive selection of the number of causal kernels.
#' First, similarly to \code{\link{FOHSIC}}, the order of selection of the
#' \eqn{n} kernels in \code{K} is determined, and then, the size of the subset
#' of ordered kernels is chosen. The size is chosen as to maximize the overall
#' association with the kernel L.
#'
#' @param K list of kernel similarity matrices
#' @param L kernel similarity matrix for the outcome
#'
#' @return a list where the the first item \code{selection} is the order of
#' selection of all kernels in the list \code{K} and the second item is the
#' number of selected kernels.
#'
#' @examples
#' n <- 50
#' p <- 20
#' K <- replicate(5, matrix(rnorm(n*p), nrow = n, ncol = p), simplify = FALSE)
#' L <- matrix(rnorm(n*p), nrow = n, ncol = p)
#' K <-  sapply(K, function(X) return(X %*% t(X) / dim(X)[2]), simplify = FALSE)
#' L <-  L %*% t(L) / p
#' adaS <- adaFOHSIC(K, L)
#' print(names(adaS) == c("selection", "n"))
#'
#' @export
adaFOHSIC <- function(K, L) {
    .Call(`_kernelPSI_adaFOHSIC`, K, L)
}

#' models the forward selection of the kernels for the adaptive variant
#'
#' Similarly to the fixed variant, the adaptive selection of the
#' kernels in a forward fashion can also be modeled with a set of
#' quadratic constraints. The constraints for adaptive selection can be split
#' into two subsets. The first subset encodes the order of selection of the
#' kernels, while the second subset encodes the selection of the number of the
#' kernels. The two subsets are equally sized (\code{length(K) - 1}) and are
#' sequentially included in the output list.
#'
#' @param K list kernel similarity matrices
#' @param select integer vector containing the order of selection of the kernels
#' in \code{K}. Typically, the \code{selection} field of the output of
#' \code{\link{FOHSIC}}.
#' @param n number of selected kernels. Typically, the \code{n} field of the
#' output of \code{\link{adaFOHSIC}}.
#'
#' @return list of matrices modeling the quadratic constraints of the
#' adaptive selection event
#'
#' @references Loftus, J. R., & Taylor, J. E. (2015). Selective inference in
#' regression models with groups of variables.
#'
#' @examples
#' n <- 50
#' p <- 20
#' K <- replicate(8, matrix(rnorm(n*p), nrow = n, ncol = p), simplify = FALSE)
#' K <-  sapply(K, function(X) return(X %*% t(X) / dim(X)[2]), simplify = FALSE)
#' L <- matrix(rnorm(n*p), nrow = n, ncol = p)
#' L <-  L %*% t(L) / p
#' adaS <- adaFOHSIC(K, L)
#' listQ <- adaQ(K, select = adaS[["selection"]], n = adaS[["n"]])
#' @export
adaQ <- function(K, select, n) {
    .Call(`_kernelPSI_adaQ`, K, select, n)
}

#' selects a fixed number of kernels which are most associated with the
#' outcome kernel.
#'
#' This function implements a forward algorithm for kernel selection. In the
#' first step, the kernel which maximizes the HSIC measure with the outcome
#' kernel \code{L} is selected. In the subsequent iterations, the kernel which,
#' combined with the selected kernels maximizes the HSIC measure is selected.
#' For the sum kernel combination rule, the forward algorithm can be
#' simplified. The kernels which maximize the HSIC measure with the kernel
#' \code{L} are selected in a descending order.
#'
#' \code{\link{FOHSIC}} implements the forward algorithm with a predetermined
#' number of kernels \code{mKernels}. If the exact number of causal kernels is
#' unavailable, the adaptive version \code{\link{adaFOHSIC}} should be
#' preferred.
#'
#' @param K list of kernel similarity matrices
#' @param L kernel similarity matrix for the outcome
#' @param mKernels number of kernels to be selected
#'
#' @return an integer vector containing the indices of the selected kernels
#'
#' @examples
#' n <- 50
#' p <- 20
#' K <- replicate(5, matrix(rnorm(n*p), nrow = n, ncol = p), simplify = FALSE)
#' L <- matrix(rnorm(n*p), nrow = n, ncol = p)
#' K <-  sapply(K, function(X) return(X %*% t(X) / dim(X)[2]), simplify = FALSE)
#' L <-  L %*% t(L) / p
#' selection <- FOHSIC(K, L, 2)
#'
#' @export
FOHSIC <- function(K, L, mKernels = 1L) {
    .Call(`_kernelPSI_FOHSIC`, K, L, mKernels)
}

#' models the forward selection event of a fixed number of kernels as a
#' succession of quadratic constraints
#'
#' The selection of the kernels with the forward algorithm implemented in
#' \code{\link{FOHSIC}} can be represented as a set of quadratic constraints.
#' This is owed to the quadratic form of the HSIC criterion. In this function,
#' we determine the matrices of the corresponding constraints. The output is a
#' list of matrices where the order is identical to the order of selection
#' of the kernels. The matrices are computed such the associated constraint is
#' nonnegative. For a length \eqn{n} of the list K, the total number of
#' constraints is \eqn{n - 1}.
#'
#' @param K list kernel similarity matrices
#' @param select integer vector containing the indices of the selected kernels
#'
#' @return list of matrices modeling the quadratic constraints of the
#' selection event
#'
#' @examples
#' n <- 50
#' p <- 20
#' K <- replicate(5, matrix(rnorm(n*p), nrow = n, ncol = p), simplify = FALSE)
#' K <-  sapply(K, function(X) return(X %*% t(X) / dim(X)[2]), simplify = FALSE)
#' listQ <- forwardQ(K, select = c(4, 1))
#' @export
forwardQ <- function(K, select) {
    .Call(`_kernelPSI_forwardQ`, K, select)
}

#' Computes the HSIC criterion for two given kernels
#'
#' The Hilbert-Schmidt Independence Criterion (HSIC) is a measure of independence
#' between two random variables. If characteristic kernels are used for both
#' variables, the HSIC is zero iff the variables are independent. In this
#' function, we implement an unbiased estimator for the HSIC measure. Specifically,
#' for two positive-definite kernels \eqn{K} and \eqn{L} and a sample size
#' \eqn{n}, the unbiased HSIC estimator is:
#' \deqn{HSIC(K, L) = \frac{1}{n(n-3)} \left[trace(KL) + \frac{1^\top K11^\top L 1}{(n-1)(n-2)}- \frac{2}{n-2}1^\top KL\right]}
#'
#' @param K first kernel similarity matrix
#' @param L second kernel similarity matrix
#'
#' @return an unbiased estimate of the HSIC measure.
#'
#' @references Song, L., Smola, A., Gretton, A., Borgwardt, K., & Bedo, J.
#' (2007). Supervised Feature Selection via Dependence Estimation.
#' https://doi.org/10.1145/1273496.1273600
#'
#' @examples
#' n <- 50
#' p <- 20
#' X <- matrix(rnorm(n*p), nrow = n, ncol = p)
#' Y <- matrix(rnorm(n*p), nrow = n, ncol = p)
#' K <-  X %*% t(X) / p
#' L <-  Y %*% t(Y) / p
#' uHSIC <- HSIC(K, L)
#'
#' @export
HSIC <- function(K, L) {
    .Call(`_kernelPSI_HSIC`, K, L)
}

#' Determines the quadratic form of the HSIC unbiased estimator
#'
#' For a linear kernel of the outcome \eqn{L = Y^\top Y}, the unbiased HSIC
#' estimator implemented in \code{\link{HSIC}} can be expressed as a quadratic
#' form of the outcome \eqn{Y} i.e. \eqn{HSIC(K, L) = Y^\top Q(K) Y}. Here,
#' the matrix \eqn{Q} only depends on the kernel similarity matrix \eqn{K}.
#'
#' @param K kernel similarity matrix
#'
#' @return the matrix of the HSIC estimator quadratic form
#'
#' @examples
#' n <- 50
#' p <- 20
#' X <- matrix(rnorm(n*p), nrow = n, ncol = p)
#' K <-  X %*% t(X) / p
#' Q <- quadHSIC(K)
#' @export
quadHSIC <- function(K) {
    .Call(`_kernelPSI_quadHSIC`, K)
}

#' samples within the acceptance region defined by the kernel selection event
#'
#' To approximate the distribution of the test statistics, we iteratively
#' sample replicates of the response in order to generate replicates
#' of the test statistics. The response replicates are iteratively sampled
#' within the acceptance region of the selection event. The goal of the
#' constrained sampling is to obtain a valid post-selection distribution of
#' the test statistic. To perform the constrained sampling, we develop a hit-and-run
#' sampler based on the hypersphere directions algorithm (see references).
#'
#' Given the iterative nature of the sampler, a large number of
#' \code{n_replicates} and \code{burn_in} iterations is needed to correctly
#' approximate the test statistics distributions.
#'
#' For high-dimensional responses, and depending on the initialization, the
#' sampler may not scale well to generate tens of thousands of replicates
#' because of an intermediate rejection sampling step.
#'
#' @param A list of matrices modeling the quadratic constraints of the
#' selection event
#' @param initial initialization sample. This sample must belong to the
#' acceptance region given by \code{A}. In practice, this parameter is set
#' to the outcome of the original dataset.
#' @param n_replicates total number of replicates to be generated
#' @param mu mean of the outcome
#' @param sigma standard deviation of the outcome
#' @param n_iter maximum number of rejections for the parameter \eqn{\lambda}
#' in a single iteration
#' @param burn_in number of burn-in iterations
#'
#' @return a matrix with \code{n_replicates} columns where each column
#' contains a sample within the acceptance region
#'
#' @references Berbee, H. C. P., Boender, C. G. E., Rinnooy Ran, A. H. G.,
#' Scheffer, C. L., Smith, R. L., & Telgen, J. (1987). Hit-and-run algorithms
#' for the identification of non-redundant linear inequalities. Mathematical
#' Programming, 37(2), 184–207.
#'
#' @references Belisle, C. J. P., Romeijn, H. E., & Smith, R. L. (2016).
#' HIT-AND-RUN ALGORITHMS FOR GENERATING MULTIVARIATE DISTRIBUTIONS,
#' 18(2), 255–266.
#'
#' @examples
#' n <- 30
#' p <- 20
#' K <- replicate(5, matrix(rnorm(n*p), nrow = n, ncol = p), simplify = FALSE)
#' K <-  sapply(K, function(X) return(X %*% t(X) / dim(X)[2]), simplify = FALSE)
#' Y <- rnorm(n)
#' L <- Y %*% t(Y)
#' selection <- FOHSIC(K, L, 2)
#' constraintQ <- forwardQ(K, select = selection)
#' samples <- sampleH(A = constraintQ, initial = Y,
#'                    n_replicates = 50, burn_in = 20)
#' @export
sampleH <- function(A, initial, n_replicates, mu = 0.0, sigma = 1.0, n_iter = 1.0e+5, burn_in = 1.0e+3) {
    .Call(`_kernelPSI_sampleH`, A, initial, n_replicates, mu, sigma, n_iter, burn_in)
}