#' Clustering by connected components of ellipsoids
#'
#' \code{cluster.assign.torus} returns clustering assignment for data
#' given \code{icp.torus} objects, which can be constructed with
#' \code{icp.torus}.
#'
#' @param icp.object an object must be an \code{icp.torus} object, which contains
#' all values to compute the conformity score constructed with \code{icp.torus},
#' or a \code{hyperparam.torus} object which is generated by \code{hyperparam.torus}.
#' @param data n x d matrix of toroidal data on \eqn{[0, 2\pi)^d}
#' or \eqn{[-\pi, \pi)^d}.
#' If \code{data = NULL}, then data within the \code{icp.object} is used.
#' @param level a scalar in \eqn{[0,1]}. If argument \code{icp.object} is an \code{icp.torus} object,
#' the default value for \code{level} is 0.1. If argument \code{icp.object} is
#' a \code{hyperparam.torus} object and \code{level = NULL}, then \code{level}
#' is set as the optimal level \code{hyperparam.torus$alphahat}.
#' @return clustering assignment for data, given \code{icp.torus} objects
#' \describe{
#' \item{\code{cluster.id.by.log.density}}{cluster assignment result based on approximate log-density.}
#' \item{\code{cluster.id.by.posterior}}{cluster assignment result based on the posterior probability.}
#' \item{\code{cluster.id.outlier}}{cluster assignment result which regards data not included in conformal prediction set
#' as outliers.}
#' \item{\code{cluster.id.by.Mah.dist}}{cluster assignment result based on Mahalanobis distance.}
#' \item{\code{level}}{used level which determines the size of clusters(conformal prediction set).}
#' \item{\code{data}}{input data which are assigned to each cluster.}
#' \item{\code{icp.torus}}{\code{icp.torus} object which is used for cluster assignment.}
#' }
#' @export
#' @references Jung, S., Park, K., & Kim, B. (2021). Clustering on the torus by conformal prediction. \emph{The Annals of Applied Statistics}, 15(4), 1583-1603.
#'
#' Gilitschenski, I., & Hanebeck, U. D. (2012, July). A robust computational test for overlap of two arbitrary-dimensional ellipsoids in fault-detection of kalman filters. In \emph{2012 15th International Conference on Information Fusion} (pp. 396-401). IEEE.
#' @seealso \code{\link[ClusTorus]{icp.torus}}, \code{\link[ClusTorus]{hyperparam.torus}}
#' @examples
#' data <- toydata1[, 1:2]
#' icp.torus <- icp.torus(data, model = "kmeans",
#' kmeansfitmethod = "general",
#' J = 4, concentration = 25)
#' level <- 0.1
#'
#' cluster.assign.torus(icp.torus, level = level)
cluster.assign.torus <- function(icp.object, data = NULL, level = NULL){
# clustering by connected components of ellipses
#
# return clustering assignment for data, given icp.torus objects.
# clustering assignment is given by
# 1) max_j phatj (phatj = pi_j f_j(x)) for max-mixture (not implemented yet)
# 2) max_k sum_{j in cluster k} phatj for max-mixture (not implemented yet)
# 3) max_j ehatj for ellipse-approx
# 4) max_k sum_{j in cluster k} ehatj for ellipse-approx
# two options
# one: every point is assigned to clusters 1:K by either (1)-(4) above
# two: outliers are assigned to cluster K+1.
if (is.null(icp.object)) {
stop("invalid input: icp.object must be input.")
}
if (sum(class(icp.object) == c("icp.torus", "hyperparam.torus")) == 0) {
stop("invalid input: argument icp.object must be an icp.torus or hyperparam.torus object.")}
if (is(icp.object, "hyperparam.torus")) {
icp.torus <- icp.object$icp.torus
if (is.null(level)){
level <- icp.object$alphahat
}
} else {
icp.torus <- icp.object
if (is.null(level)){
level <- 0.1 # default
}
}
if (!is.numeric(level)){
level <- 0.1
warning("invalid level: level = 0.1 is used.")
}
if (is.null(data)){
data <- icp.torus$data
}else{
if (ncol(data) != icp.torus$d) {
stop("dimension mismatch: dimension of data and icp.torus are not the same.")}
data <- on.torus(data)
}
n2 <- icp.torus$n2
ialpha <- ifelse((n2 + 1) * level < 1, 1, floor((n2 + 1) * level))
cluster.obj <- list()
# For kmeans to kspheres --------------------------------------------------
if(icp.torus$model == "kde"){
stop("model kde does not support clustering.")
} else {
t <- icp.torus$score_ellipse[ialpha]
cluster.obj <- conn.comp.ellipse(icp.torus$ellipsefit, t)
K <- cluster.obj$ncluster
ehatj <- ehat.eval(data, icp.torus$ellipsefit)
ehatj[,cluster.obj$componentid == 0] <- -Inf
# maxj.id <- apply(ehatj, 1, which.max)
maxj.id <- max.col(ehatj, ties.method = "first")
cluster.id1 <- cluster.obj$componentid[maxj.id]
cluster.obj$cluster.id.log.density <- cluster.id1
partsum <- matrix(0, nrow = nrow(data), ncol = K)
for(k in 1:K){
ifelse(sum( cluster.obj$componentid == k) > 1,
partsum[,k] <- rowSums(ehatj[,cluster.obj$componentid == k]),
partsum[,k] <- ehatj[,cluster.obj$componentid == k])
}
# cluster.obj$mixture$cluster.id.by.partialsum <- apply(partsum, 1, which.max)
cluster.obj$cluster.id.by.posterior <- max.col(partsum, ties.method = "first")
# ehat <- apply(ehatj,1,max)
ehat <- do.call(pmax, as.data.frame(ehatj))
cluster.id1[!(ehat >= icp.torus$score_ellipse[ialpha])] <- K+1
cluster.obj$cluster.id.outlier <- cluster.id1
# Use mahalanobis distance
mah <- mah.dist2.eval(data, icp.torus$ellipsefit)
mah[,cluster.obj$componentid == 0] <- Inf
# maxj.id <- apply(mah, 1, which.min)
maxj.id <- max.col(-mah, ties.method = "first")
cluster.id1 <- cluster.obj$componentid[maxj.id]
cluster.obj$cluster.id.by.Mah.dist <- cluster.id1
}
cluster.obj$level <- level
cluster.obj$data <- data
cluster.obj$icp.torus <- icp.torus
return(structure(cluster.obj, class = "cluster.obj"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.