#' Iterative Sparse Estimation
#'
#' Performs iterative sparse estimation of many normal means either using
#' the soft threshold estimator or the hard-threshold estimator.
#'
#'
#' Given \eqn{Y = (Y1, Y2, \dots, Yn)}, such that:
#' \deqn{Yi ~ N(\theta i, \sigma2), 1 \le i \le n.}
#' \eqn{N(\theta, \sigma2)} denotes the normal distribution with
#' means \eqn{\theta} and variance \eqn{\sigma2}. The means are to
#' be estimated. Variance is either known or to be estimated.
#'
#'
#' Means are either estimated using the soft-threhsold (ST) or the
#' hard-threshold (HT) estimator. ITSES minimize expected square error loss
#' under repeated sampling (risk).
#'
#' Soft-threshold: \eqn{\hat{\theta}(x)= sign(x)(abs(x)-\lambda)(abs(x)>\lambda)}.
#'
#'
#' Hard-threshold: \eqn{\hat{\theta}(x)= x(abs(x)>\lambda)}.
#'
#'
#' @param y A `n` vector of observations.
#' @param method Estimator which will be used. Can be either
#' "ST" (Soft-Threshold estimator) or "HT" (Hard-Threshold estimator).
#' Defaults to "ST".
#' @param m The number of iterative steps. Default is `5`.
#' @param init.lambda Initial threshold to start iterations at. Can be:
#' `numeric`; "median"; "visu" (universal threshold); and "half-visu"
#' (universal threshold divded by 2). Defaults to "median".
#' @param max.length The maximum number of observations `n`. If`n` is above
#' internal calculations will downsample `y` to be of `max.length` size.
#' Defaults to `5e6`.
#' @param minimization.method The risk minimization method which will be used.
#' Can be either "numeric" or "sampling". Default is "numeric".
#' @param debug `logical`. Specify wether or not to print debug code from
#' iterations.
#' @param sd Standard deviation of `y` i.e. noise levels. If `NA` will estiamt
#' e using specified MAD estimator. Default is `NA`.
#' @param sparsity Sparsity of `y` if known. `0 < sparsity < 1`, closer to 1
#' means highly sparse. Is used in noise estimation if sparse noise estimation
#' is used. Default is `NA`.
#' @param sparse.mad `logical`. If `TRUE` use sparsity in MAD noise estimation.
#' Default is `TRUE`.
#' @param remove.zero `logical`. Remove observation that are zero from noise
#' estimatiom. Default is `TRUE`.
#' @param tol A numeric that determines the sensitivity of threshold selection.
#' Default is `1e-8`, iteration will stop if change is less than set `tol`.
#' @param h Parameter for deciding eligibility of sparsity measure.
#' Default is `0.4`, only used with SparseMAD estimator.
#' @param ... parameters passed to risk minmization methods.
#' @param b number of samples to take. Default is `10`.
#' @param k number of thresholds to initially evaluate. Default is `10`.
#' @param max_num_iters maximum number of iterations with Newton's method or
#' spline interpolations (depending on method used). Default is `10`.
#' @return List object with iteration results.
#'
#' @export
itses <- function(y,
method = "ST",
m = 5,
init.lambda = "median",
max.length = 5e6,
minimization.method = "numeric",
debug = FALSE,
sd = NA,
sparsity = NA,
sparse.mad = TRUE,
remove.zero = TRUE,
tol = 1e-8,
h = 0.4,
max.threshold = NULL,
min.threshold = NULL,
...
) {
# Check input
n <- length(y)
if(n < 1) {
stop("Not enough data points")
}
if(debug) print(paste("Starting ITSES. Number of points in data: ", n))
# Estimate noise if not available.
if((!is.numeric(sd)) | (sd <= 0)) {
if(debug) print("Starting sd estimation.")
y.temp <- y # Temp. variable
# If wanted, remove 0s from estimation.
if(remove.zero) {
if(debug) print("Removing zeros frem sd estimation.")
y.temp<- y.temp[y.temp != 0]
}
if(sparse.mad) {
if(debug) print("Using SparseMAD estimator.")
# Estimate noise by building in sparsity assumption.
if(is.na(sparsity)) {
# Use 'SparseMAD' to estimate noise.
if(debug) print("Estimating noise.")
sd <- sparse.mad.estimator(y.temp, h = h, debug = debug)
}else{
if(debug) print("Using given sparsity level.")
# If sparsity is available find sparsity threshold.
t <- get.threshold.for.sparsity(y, sparsity)
# Use suggested 'SparseMAD' with given threshold.
sd <- thresholded.mad.estimator(y.temp, t)
}
}else{
if(debug) print("Using MAD estimator to estimate noise.")
sd <- mad.estimator(y.temp)
}
}
if(debug) print(paste0("Sd:", sd))
# Normalize data for iterative threshold selection.
z <- y/sd
# Set minimum and maximimum bounds for threshold.
if(is.null(min.threshold)) min.threshold <- 0
if(is.null(max.threshold)) max.threshold <- get.max.threshold(method, z)
if(debug) print(paste("Threshold bounds are set to:", min.threshold, max.threshold))
# Get starting threshold.
lambda <- get.init.lambda(z, init.lambda = init.lambda)
lambda <- get.thresholded.lambda(lambda, min.threshold, max.threshold)
# Set start parameters.
if(minimization.method == "sampling") lambda.grid <- NULL
# Initiate objects to store results in.
sds <- sd
iteration_result <- list()
lambdas <- lambda # List storing all threholds.
# Initaitate variable to store last iteration step
last.lambda <- -Inf
# Start iterative estimation.
for(i in 0:(m-1)) {
if(debug) print(paste("Iteration:", i, "Threshold:", lambda))
# Down-sample if data exceeds max length.
z.sample <- z
if(n > max.length) {
if(debug) print("Downsampling")
z.sample <- sample(z, max.length)
}
# Select minimization method.
if(minimization.method == "sampling") {
# Find threshold by using sampling to estimate risk.
iter.results <- iter.sampling(y = z.sample,
init.lambda = lambda,
...,
lambda.grid = lambda.grid,
debug = debug,
method = method,
tol = tol,
min.threshold = min.threshold,
max.threshold = max.threshold
)
lambda.grid <- iter.results$lambdas
}else if(minimization.method == "numeric") {
# Find threshold by directly minimizing risk.
iter.results <- iter.newton(y = z.sample,
init.lambda = lambda,
...,
method = method,
tol = tol,
debug = debug,
min.threshold = min.threshold,
max.threshold = max.threshold )
}else{
stop("Not supported minimization method, only 'sampling' and 'numeric' currently supported.")
}
# Store results.
iteration_result[[i+1]] <- cbind(sd*iter.results$lambdas,sd^2*iter.results$risks)
lambda <- iter.results$lambda
lambdas <- c(lambdas, lambda)
# Check convergence.
# First equality is to chek if Inf == Inf as Inf - Inf is NA ...
if(last.lambda == lambda|abs(last.lambda - lambda)<tol) {
if(debug) print("Breaking iterations, same threshold as previous iteration.")
break()
}
last.lambda <- lambda
}
# Get final mean estimates
if(method == "ST") {
theta <- soft.threshold.estimator(y, sd*lambda)
}else if(method == "HT") {
theta <- hard.threshold.estimator(y, sd*lambda)
}else{
stop("Unsupported estimator selected.")
}
# Storing parameters and results.
result <- list(theta = theta,
lambda = sd*lambda,
sd = sd,
sds = sds,
lambdas = sd*lambdas)
result[["iteration_result"]] <- iteration_result
result[["y"]] <- y
result[["method"]] <- method
result[["m"]] <- m
result[["minimization.method"]] <- minimization.method
# Return results.
if(debug) print(paste0("Final threshold is: ", result$lambda))
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.