#' Selecting optimal number of mixture components based on various criteria
#'
#' \code{hyperparam.J} evaluates criterion for each \code{icp.torus} objects, and select
#' the optimal number of mixture components based on the evaluated criterion.
#'
#' @param icp.torus.objects a list whose elements are icp.torus objects, generated by
#' \code{icp.torus}.
#' @param option a string one of "risk", "AIC", or "BIC", which determines the criterion
#' for the model selection. "risk" is based on the negative log-likelihood, "AIC" for the
#' Akaike Information Criterion, and "BIC" for the Bayesian Information Criterion.
#' @return returns a \code{hyperparam.J} object which contains a \code{data.frame} for
#' the evaluated criterion corresponding to each number of components, the optimal
#' number of components, and the corresponding \code{icp.torus} object.
#' @export
#' @seealso \code{\link[ClusTorus]{icp.torus}}, \code{\link[ClusTorus]{hyperparam.torus}},
#' \code{\link[ClusTorus]{hyperparam.alpha}}
#' @references Jung, S., Park, K., & Kim, B. (2021). Clustering on the torus by conformal prediction. \emph{The Annals of Applied Statistics}, 15(4), 1583-1603.
#'
#' Akaike, H. (1974). A new look at the statistical model identification. \emph{IEEE transactions on automatic control}, 19(6), 716-723.
#'
#' Schwarz, G. (1978). Estimating the dimension of a model. \emph{The annals of statistics}, 461-464.
#' @examples
#' \donttest{
#' data <- toydata1[,1:2]
#' n <- nrow(data)
#' split.id <- rep(2,n)
#' split.id[ sample(n,floor(n/2)) ] <- 1
#'
#' Jvec = 4:20
#' icp.torus.objects <- icp.torus(data, split.id = split.id, model = "kmeans", J = Jvec)
#'
#' hyperparam.J(icp.torus.objects, option = "AIC")}
hyperparam.J <- function(icp.torus.objects, option = c("risk", "AIC", "BIC")){
if (is.null(icp.torus.objects)) {stop("icp.torus.objects must be input.")}
if (!is.list(icp.torus.objects)) {stop("icp.torus.objects must be a list.")}
icp.torus.objects[sapply(icp.torus.objects, is.null)] <- NULL # remove all null elements
n.icp.torus <- length(icp.torus.objects)
for(i in 1:n.icp.torus){
if(class(icp.torus.objects[[i]]) != "icp.torus"){
stop("invalid input: the elements of icp.torus.objects must be icp.torus objects.")
}
}
data <- as.matrix(icp.torus.objects[[1]]$data)
option <- match.arg(option) # default is risk
split.id <- icp.torus.objects[[1]]$split.id
d <- ncol(data)
n2 <- icp.torus.objects[[1]]$n2
if(icp.torus.objects[[1]]$model == "mixture") {
model <- "mixture"
mixturefitmethod <- icp.torus.objects[[1]]$fittingmethod
} else if(icp.torus.objects[[1]]$model == "kmeans") {
model <- "kmeans"
kmeansfitmethod <- icp.torus.objects[[1]]$fittingmethod
} else {stop("method kde is not supported.")}
for (i in n.icp.torus){
if (model == icp.torus.objects[[i]]$model) {next}
else {stop("icp.torus objects must share the same method.")}
}
output <- list()
IC <- data.frame()
penalty <- ifelse(option == "AIC", 2,
ifelse(option == "BIC", log(n2), 0))
# 1. kmeans -----------------------------------------------------
if (model == "kmeans"){
# preparing the number of model parameters
if (kmeansfitmethod == "homogeneous-circular"){
k <- d + 1
} else if (kmeansfitmethod == "heterogeneous-circular"){
k <- d + 2
} else {k <- (d + 1)*(d + 2)/2}
for (i in 1:n.icp.torus){
j <- length(icp.torus.objects[[i]]$ellipsefit$c)
if (is.null(icp.torus.objects[[i]])) {next}
# approximated 2 * log-likelihood for normal approximation
sum.conformity.scores <- sum(icp.torus.objects[[i]]$score_ellipse) - n2 * d * log(2 * pi)
# evaluating 2 * log-likelihood for AIC/BIC
if (option != "risk") {
sum.conformity.scores <- 2 * icp.torus.objects[[i]]$ellipsefit$loglkhd
nsingular <- length(icp.torus.objects[[i]]$ellipsefit$singular)
}
# evaluate risk/AIC/BIC
# nsingular term corrects the conformity score for the singular matrices
criterion <- - sum.conformity.scores + (k * j - 1) * penalty +
ifelse(option != "risk", nsingular * ((log(1e+6^(d - 2))) + d * log((2*pi))), 0)
IC <- rbind(IC, data.frame(J = j, criterion = criterion))
cat(".")
}
cat("\n")
IC.index <- which.min(IC$criterion)
Jhat <- IC[IC.index, 1]
}
# 2. mixture ----------------------------------------------------
else if (model == "mixture"){
# preparing the number of model parameters
if (mixturefitmethod == "circular"){
k <- d + 2
} else if (mixturefitmethod == "axis-aligned"){
k <- 2 * d + 1
} else if (mixturefitmethod == "general"){
k <- (d + 1)*(d + 2)/2
} else stop("Bayesian is not implemented yet.")
for (i in 1:n.icp.torus){
if (is.null(icp.torus.objects[[i]])) {next}
j <- length(icp.torus.objects[[i]]$ellipsefit$c)
# likelihood for mixture model
sum.conformity.scores <- sum(log(icp.torus.objects[[i]]$score))
# evaluating log-likelihood for AIC/BIC
if (option != "risk") {
sum.conformity.scores <- icp.torus.objects[[i]]$fit$loglkhd.seq[length(icp.torus.objects[[i]]$fit$loglkhd.seq)]
}
# evaluate risk/AIC/BIC
criterion <- - 2 * sum.conformity.scores + (k * j - 1) * penalty
IC <- rbind(IC, data.frame(J = j, criterion = criterion))
cat(".")
}
cat("\n")
IC.index <- which.min(IC$criterion)
Jhat <- IC[IC.index, 1]
}
output$criterion <- option
output$IC.results <- IC
output$Jhat <- Jhat
output$icp.torus <- icp.torus.objects[[IC.index]]
return(structure(output, class = "hyperparam.J"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.