Nothing
#' Selecting optimal hyperparameters for the conformal prediction set
#'
#' \code{hyperparam.torus} selects optimal hyperparameters for constructing the conformal prediction
#' set, based on the type of postulated model and the criterion.
#'
#' @param icp.torus.objects list whose elements are icp.torus objects, generated by
#' \code{icp.torus}
#' @param option A string. One of "elbow", "risk", "AIC", or "BIC", which determines the
#' criterion for the model selection. "risk" is based on the negative log-likelihood, "AIC"
#' for the Akaike Information Criterion, and "BIC" for the Bayesian Information Criterion.
#' "elbow" is based on minimizing the criterion used in Jung et. al.(2021). Default is
#' \code{option = "elbow"} for 2-dimensional cases and \code{option = "risk"} for d(>2)-dimensional cases.
#' @param alphavec either a scalar or a vector, or even \code{NULL} for the levels.
#' Default value is \code{NULL}. If \code{NULL}, then \code{alphavec} is
#' automatically generated as a sequence from 0 to \code{alpha.lim}.
#' @param alpha.lim a positive number lower than 1. Default value is \code{NULL}.
#' If \code{NULL}, then \code{alpha.vec} is is 0.5 for \code{option = "elbow"}, and
#' 0.15 for options c("risk", "AIC", or "BIC").
#' @param eval.point N x N numeric matrix on \eqn{[0, 2\pi)^2}.
#' Default input is \code{grid.torus}.
#' @return returns a list object which contains \code{data.frame} objects for
#' the evaluated criterion corresponding to each hyperparameter,
#' selected hyperparameters based on the designated criterion, and
#' an \code{icp.torus} object based the selected hyperparameters.
#' @export
#' @references Jung, S., Park, K., & Kim, B. (2021). Clustering on the torus by conformal prediction. \emph{The Annals of Applied Statistics}, 15(4), 1583-1603.
#'
#' Akaike, H. (1974). A new look at the statistical model identification. \emph{IEEE transactions on automatic control}, 19(6), 716-723.
#'
#' Schwarz, G. (1978). Estimating the dimension of a model. \emph{The annals of statistics}, 461-464.
#' @examples
#' \donttest{
#' data <- toydata2[, 1:2]
#' n <- nrow(data)
#' split.id <- rep(2, n)
#' split.id[sample(n, floor(n/2))] <- 1
#' Jvec <- 3:35
#' icp.torus.objects <- icp.torus(data, split.id = split.id, model = "kmeans",
#' kmeansfitmethod = "ge", init = "h",
#' J = Jvec, verbose = TRUE)
#' hyperparam.torus(icp.torus.objects, option = "risk")
#' }
hyperparam.torus <- function(icp.torus.objects,
option = NULL, #c("elbow", "risk", "AIC", "BIC"),
alphavec = NULL,
alpha.lim = NULL,
eval.point = NULL){
if (is.null(icp.torus.objects)) {stop("icp.torus.objects must be input.")}
if (!is.list(icp.torus.objects)) {stop("icp.torus.objects must be a list.")}
icp.torus.objects[sapply(icp.torus.objects, is.null)] <- NULL # remove all null elements
n.icp.torus <- length(icp.torus.objects)
for(i in 1:n.icp.torus){
if(class(icp.torus.objects[[i]]) != "icp.torus"){
stop("invalid input: the elements of icp.torus.objects must be icp.torus objects.")
}
}
model <- icp.torus.objects[[1]]$model
fitmethod <- ifelse(model != "kde", icp.torus.objects[[1]]$fittingmethod, " ")
data <- as.matrix(icp.torus.objects[[1]]$data)
d <- icp.torus.objects[[1]]$d
n2 <- icp.torus.objects[[1]]$n2
# if d <= 2, default is elbow based criterion. If d > 2, default is risk.
if (is.null(option)) {
option <- ifelse(d <= 2, "elbow", "risk")
}
if (option != "elbow" && model == "kde"){
warning("Parameters for kde should be chosen by option elbow. Switching to option elbow...")
option = "elbow"
}
if (model != "kmeans" && d > 2) {
stop(paste("The model", model," is not supported for dimension d >= 3."))
}
if (is.null(alpha.lim)) { alpha.lim <- ifelse(option=="elbow",0.5,0.15)}
if (is.null(alphavec) && alpha.lim > 1) {stop("alpha.lim must be less than 1.")}
output <- list()
output$model <- c(model, fitmethod)
output$option <- option
# initializing alphavec
if (is.null(alphavec)) {alphavec <- 1:floor(n2 * alpha.lim) / n2}
# criterion based on elbow -----------------------------------
if (option == "elbow"){
if (d > 3) {warning("Option `elbow` takes long for high dimensional case (d >= 3).", immediate. = TRUE)}
# generating grid points if eval.point == NULL : sparse when d is large.
grid.size <- ifelse(d == 2, 100, floor(10^(6/d)))
if (is.null(eval.point)) { eval.point <- grid.torus(d = d, grid.size = grid.size)}
# 1. kmeans -----------------------------------------------------
if (model == "kmeans"){
N <- length(alphavec)
Mvec <- vector("numeric", length = N)
out <- data.frame()
for (j in 1:n.icp.torus){
inclusion.test <- icp.torus.eval(icp.torus.objects[[j]], level = alphavec, eval.point = eval.point)
Mvec <- colSums(inclusion.test$Chat_kmeans)/nrow(eval.point)
out <- rbind(out, data.frame(id = j,J = length(icp.torus.objects[[j]]$ellipsefit$c), alpha = alphavec, mu = Mvec, criterion = alphavec + Mvec))
cat(".")
}
cat("\n")
out.index <- which.min(out$criterion)
output$results <- out[,2:5]
output$icp.torus <- icp.torus.objects[[out[out.index, 1]]]
output$Jhat <- unlist(out[out.index, 2])
output$alphahat <- unlist(out[out.index, 3])
#output$optim$icp.torus <- icp.torus.objects[[out[out.index, 1]]]
#output$optim$hyperparam <- unlist(out[out.index, 2:3])
return(structure(output, class = "hyperparam.torus"))
# 2. mixture ----------------------------------------------------------------
} else if (model == "mixture") {
N <- length(alphavec)
Mvec <- vector("numeric", length = N)
out <- data.frame()
for (j in 1:n.icp.torus){
inclusion.test <- icp.torus.eval(icp.torus.objects[[j]], level = alphavec, eval.point = eval.point)
Mvec <- colSums(inclusion.test$Chat_mix)/nrow(eval.point)
out <- rbind(out, data.frame(id = j, J = length(icp.torus.objects[[j]]$ellipsefit$c), alpha = alphavec, mu = Mvec, criterion = alphavec + Mvec))
cat(".")
}
cat("\n")
out.index <- which.min(out$criterion)
output$results <- out[,2:5]
output$icp.torus <- icp.torus.objects[[out[out.index, 1]]]
output$Jhat <- unlist(out[out.index, 2])
output$alphahat <- unlist(out[out.index, 3])
#output$optim$icp.torus <- icp.torus.objects[[out[out.index, 1]]]
#output$optim$hyperparam <- unlist(out[out.index, 2:3])
return(structure(output, class = "hyperparam.torus"))
}
# 3. kde --------------------------------------------------
else {
N <- length(alphavec)
Mvec <- vector("numeric", length = N)
out <- data.frame()
for (k in 1:n.icp.torus){
inclusion.test <- icp.torus.eval(icp.torus.objects[[k]], level = alphavec, eval.point = eval.point)
Mvec <- colSums(inclusion.test$Chat_kde)/nrow(eval.point)
out <- rbind(out, data.frame(id = k, k = icp.torus.objects[[k]]$concentration, alpha = alphavec, mu = Mvec, criterion = alphavec + Mvec))
cat(".")
}
cat("\n")
out.index <- which.min(out$criterion)
output$results <- out[,2:5]
output$icp.torus <- icp.torus.objects[[out[out.index, 1]]]
output$khat <- unlist(out[out.index, 2])
output$alphahat <- unlist(out[out.index, 3])
return(structure(output, class = "hyperparam.torus"))
}
}
# criterion based on information criteria ----------------------
if (sum(option == c("AIC", "BIC", "risk")) == 1){
results.J <- hyperparam.J(icp.torus.objects = icp.torus.objects,
option = option)
output$IC.results <- results.J$IC.results
Jhat <- results.J$Jhat
icp.torus <- results.J$icp.torus
results.alpha <- hyperparam.alpha(icp.torus, alphavec = alphavec, alpha.lim = alpha.lim)
output$alpha.results <- results.alpha$alpha.results
alphahat <- results.alpha$alphahat
hyperparam <- c(Jhat, alphahat)
names(hyperparam) <- c("J", "alpha")
output$icp.torus <- icp.torus
output$Jhat <- hyperparam[1]
output$alphahat <- hyperparam[2]
return(structure(output, class = "hyperparam.torus"))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.