###############################################################################
# Functions for L0 parameter estimation using regularity #
###############################################################################
# Reference
# ---------
# * Golovkine S., Klutchnikoff N., Patilea V. (2020) - Learning the smoothness
# of noisy curves with applications to online curves denoising.
# * Golovkine S., Klutchnikoff N., Patilea V. (2021) - Adaptive estimation of
# irregular mean and covariance functions.
#' Perform an estimation of \eqn{L_0}
#'
#' This function performs an estimation of \eqn{H_0} used for the estimation of
#' the bandwidth for a univariate kernel regression estimator defined over
#' continuous domains data using the method of \cite{add ref}.
#'
#' @importFrom magrittr %>%
#'
#' @family estimate \eqn{L_0}
#'
#' @param data A list, where each element represents a curve. Each curve have to
#' be defined as a list with two entries:
#' \itemize{
#' \item \strong{$t} The sampling points
#' \item \strong{$x} The observed points.
#' }
#' @param t0 Numeric, the sampling point at which we estimate \eqn{H0}. We will
#' consider the \eqn{8k0 - 7} nearest points of \eqn{t_0} for the estimation of
#' \eqn{H_0} when \eqn{\sigma} is unknown.
#' @param H0 Numeric, an estimation of \eqn{H_0}
#' @param k0 Numeric, the number of neighbors of \eqn{t_0} to consider. Should
#' be set as \eqn{k0 = M * exp(-log(log(M))^2)}.
#' @param sigma Numeric, true value of sigma. Can be NULL.
#' @param density Logical, do the sampling points have a uniform distribution?
#' (default is FALSE)
#'
#' @return Numeric, an estimation of L0.
#' @export
#' @examples
#' X <- generate_fractional_brownian(N = 1000, M = 300, H = 0.5, sigma = 0.05)
#' estimate_L0(X, t0 = 0.5, H0 = 0.5, k0 = 2)
estimate_L0 <- function(data, t0 = 0, H0 = 0, k0 = 2, sigma = NULL, density = FALSE) {
if(H0 > 1) H0 = 1
# Estimate mu
mu_hat <- data %>%
purrr::map_int(~ length(.x$t)) %>%
mean()
# Estimate L0
theta <- function(v, k, idx) (v[idx + 2 * k - 1] - v[idx + k])**2
eta <- function(v, k, idx, H) (v[idx + 2 * k - 1] - v[idx + k])**(2 * H)
nume <- 1
deno <- 1
if (!density) { # Case where the density is not known
if (is.null(sigma)) { # Subcase where sigma is not known
idxs <- data %>%
purrr::map_dbl(~ min(order(abs(.x$t - t0))[seq_len(4 * k0 - 2)]))
a <- data %>%
purrr::map2_dbl(idxs, ~ theta(.x$x, k = 2 * k0 - 1, idx = .y)) %>%
mean()
b <- data %>%
purrr::map2_dbl(idxs, ~ theta(.x$x, k = k0, idx = .y)) %>%
mean()
c <- data %>%
purrr::map2_dbl(idxs, ~ eta(.x$t, k = 2 * k0 - 1, idx = .y, H = H0)) %>%
mean()
d <- data %>%
purrr::map2_dbl(idxs, ~ eta(.x$t, k = k0, idx = .y, H = H0)) %>%
mean()
if ((a - b > 0) & (c - d > 0)) {
nume <- a - b
deno <- c - d
}
} else { # Subcase where sigma is known
idxs <- data %>%
purrr::map_dbl(~ min(order(abs(.x$t - t0))[seq_len(2 * k0)]))
a <- data %>%
purrr::map2_dbl(idxs, ~ theta(.x$x, k = k0, idx = .y)) %>%
mean()
b <- data %>%
purrr::map2_dbl(idxs, ~ eta(.x$t, k = k0, idx = .y, H = H0)) %>%
mean()
if ((a - 2 * sigma**2 > 0) & b > 0) {
nume <- a - 2 * sigma**2
deno <- b
}
}
} else { # Case where the density is known (only the uniform case)
if (is.null(sigma)) { # Subcase where sigma is not known
idxs <- data %>%
purrr::map_dbl(~ min(order(abs(.x$t - t0))[seq_len(4 * k0 - 2)]))
a <- data %>%
purrr::map2_dbl(idxs, ~ theta(.x$x, k = 2 * k0 - 1, idx = .y)) %>%
mean()
b <- data %>%
purrr::map2_dbl(idxs, ~ theta(.x$x, k = k0, idx = .y)) %>%
mean()
if (a - b > 0) nume <- a - b
deno <- (2**(2 * H0) - 1) * ((k0 - 1) / (mu_hat + 1))**(2 * H0)
} else { # Subcase where sigma is known
idxs <- data %>%
purrr::map_dbl(~ min(order(abs(.x$t - t0))[seq_len(2 * k0)]))
a <- data %>%
purrr::map2_dbl(idxs, ~ theta(.x$x, k = k0, idx = .y)) %>%
mean()
if (a - 2 * sigma**2 > 0) nume <- a - 2 * sigma**2
deno <- ((k0 - 1) / (mu_hat + 1))**(2 * H0)
}
}
(nume / deno)**0.5
}
#' Perform the estimation of \eqn{L_0} given a list of \eqn{t_0}
#'
#' This function performs an estimation of \eqn{H_0} used for the estimation of
#' the bandwidth for a univariate kernel regression estimator defined over
#' continuous domains data using the method of \cite{add ref}.
#'
#' @importFrom magrittr %>%
#'
#' @family estimate \eqn{L_0}
#'
#' @param data A list, where each element represents a curve. Each curve have to
#' be defined as a list with two entries:
#' \itemize{
#' \item \strong{$t} The sampling points
#' \item \strong{$x} The observed points.
#' }
#' @param t0_list A vetor of numerics, the sampling points at which we estimate
#' \eqn{H0}. We will consider the \eqn{8k0 - 7} nearest points of \eqn{t_0} for
#' the estimation of \eqn{H_0} when \eqn{\sigma} is unknown.
#' @param H0_list A vector of numerics, an estimation of \eqn{H_0} at every
#' \eqn{t_0} given in \code{t0_list}.
#' @param k0_list A vector of numerics, the number of neighbors of \eqn{t_0} to
#' consider. Should be set as \eqn{k0 = M * exp(-log(log(M))^2)}.
#' @param sigma Numeric, true value of sigma. Can be NULL.
#' @param density Logical, do the sampling points have a uniform distribution?
#' (default is FALSE)
#'
#' @return A vector of numerics, an estimation of \eqn{L_0} at each \eqn{t_0}.
#' @export
#' @examples
#' X <- generate_fractional_brownian(N = 1000, M = 300, H = 0.5, sigma = 0.05)
#' estimate_L0_list(X, t0_list = 0.5, H0_list = 0.5)
#'
#' df_piece <- generate_piecewise_fractional_brownian(N = 1000, M = 300,
#' H = c(0.2, 0.5, 0.8),
#' sigma = 0.05)
#' estimate_L0_list(df_piece, t0_list = c(0.15, 0.5, 0.85),
#' H0_list = c(0.2, 0.5, 0.8), k0 = 6)
estimate_L0_list <- function(data, t0_list, H0_list,
k0_list = 2, sigma = NULL, density = FALSE) {
if(!inherits(data, 'list')) data <- checkData(data)
if (length(t0_list) != length(H0_list)) {
stop("t0_list and H0_list must have the same length")
}
purrr::pmap_dbl(list(t0_list, H0_list, k0_list),
function(t0, H0, k0) estimate_L0(data, t0 = t0, H0 = H0,
k0 = k0, sigma = sigma,
density = density))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.