R/cluster.assign.torus.R

Defines functions cluster.assign.torus

Documented in cluster.assign.torus

#' Clustering by connected components of ellipsoids
#'
#' \code{cluster.assign.torus} returns clustering assignment for data
#'   given \code{icp.torus} objects, which can be constructed with
#'   \code{icp.torus}.
#'
#' @param icp.object an object must be an \code{icp.torus} object, which contains
#'   all values to compute the conformity score constructed with \code{icp.torus},
#'   or a \code{hyperparam.torus} object which is generated by \code{hyperparam.torus}.
#' @param data n x d matrix of toroidal data on \eqn{[0, 2\pi)^d}
#'    or \eqn{[-\pi, \pi)^d}.
#'    If \code{data = NULL}, then data within the \code{icp.object} is used.
#' @param level a scalar in \eqn{[0,1]}. If argument \code{icp.object} is an \code{icp.torus} object,
#'   the default value for \code{level} is 0.1. If argument \code{icp.object} is
#'   a \code{hyperparam.torus} object and \code{level = NULL}, then \code{level}
#'   is set as the optimal level \code{hyperparam.torus$alphahat}.
#' @return clustering assignment for data, given \code{icp.torus} objects
#' \describe{
#'   \item{\code{cluster.id.by.log.density}}{cluster assignment result based on approximate log-density.}
#'   \item{\code{cluster.id.by.posterior}}{cluster assignment result based on the posterior probability.}
#'   \item{\code{cluster.id.outlier}}{cluster assignment result which regards data not included in conformal prediction set
#'   as outliers.}
#'   \item{\code{cluster.id.by.Mah.dist}}{cluster assignment result based on Mahalanobis distance.}
#'   \item{\code{level}}{used level which determines the size of clusters(conformal prediction set).}
#'   \item{\code{data}}{input data which are assigned to each cluster.}
#'   \item{\code{icp.torus}}{\code{icp.torus} object which is used for cluster assignment.}
#' }
#' @export
#' @references Jung, S., Park, K., & Kim, B. (2021). Clustering on the torus by conformal prediction. \emph{The Annals of Applied Statistics}, 15(4), 1583-1603.
#'
#'   Gilitschenski, I., & Hanebeck, U. D. (2012, July). A robust computational test for overlap of two arbitrary-dimensional ellipsoids in fault-detection of kalman filters. In \emph{2012 15th International Conference on Information Fusion} (pp. 396-401). IEEE.
#' @seealso \code{\link[ClusTorus]{icp.torus}}, \code{\link[ClusTorus]{hyperparam.torus}}
#' @examples
#' data <- toydata1[, 1:2]
#' icp.torus <- icp.torus(data, model = "kmeans",
#'                        kmeansfitmethod = "general",
#'                        J = 4, concentration = 25)
#' level <- 0.1
#'
#' cluster.assign.torus(icp.torus, level = level)
cluster.assign.torus <- function(icp.object, data = NULL, level = NULL){
  # clustering by connected components of ellipses
  #
  # return clustering assignment for data, given icp.torus objects.
  # clustering assignment is given by
  # 1) max_j phatj (phatj = pi_j f_j(x)) for max-mixture (not implemented yet)
  # 2) max_k sum_{j in cluster k} phatj for max-mixture  (not implemented yet)
  # 3) max_j ehatj for ellipse-approx
  # 4) max_k sum_{j in cluster k} ehatj for ellipse-approx
  # two options
  # one: every point is assigned to clusters 1:K by either (1)-(4) above
  # two: outliers are assigned to cluster K+1.

  if (is.null(icp.object)) {
    stop("invalid input: icp.object must be input.")
  }

  if (sum(class(icp.object) == c("icp.torus", "hyperparam.torus")) == 0) {
    stop("invalid input: argument icp.object must be an icp.torus or hyperparam.torus object.")}



  if (is(icp.object, "hyperparam.torus")) {
    icp.torus <- icp.object$icp.torus
    if (is.null(level)){
      level <- icp.object$alphahat
    }
  } else {
    icp.torus <- icp.object
    if (is.null(level)){
      level <- 0.1 # default
    }
  }

  if (!is.numeric(level)){
    level <- 0.1
    warning("invalid level: level = 0.1 is used.")
  }

  if (is.null(data)){
    data <- icp.torus$data
  }else{
    if (ncol(data) != icp.torus$d) {
      stop("dimension mismatch: dimension of data and icp.torus are not the same.")}
    data <- on.torus(data)
  }

  n2 <- icp.torus$n2
  ialpha <- ifelse((n2 + 1) * level < 1, 1, floor((n2 + 1) * level))

  cluster.obj <- list()
  # For kmeans to kspheres --------------------------------------------------

  if(icp.torus$model == "kde"){
    stop("model kde does not support clustering.")
  } else {
    t <- icp.torus$score_ellipse[ialpha]
    cluster.obj <- conn.comp.ellipse(icp.torus$ellipsefit, t)
    K <- cluster.obj$ncluster
    ehatj <- ehat.eval(data, icp.torus$ellipsefit)

    ehatj[,cluster.obj$componentid == 0] <- -Inf

    # maxj.id <- apply(ehatj, 1, which.max)
    maxj.id <- max.col(ehatj, ties.method = "first")
    cluster.id1 <- cluster.obj$componentid[maxj.id]
    cluster.obj$cluster.id.log.density <- cluster.id1

    partsum <- matrix(0, nrow = nrow(data), ncol = K)
    for(k in 1:K){
      ifelse(sum( cluster.obj$componentid == k) > 1,
             partsum[,k] <- rowSums(ehatj[,cluster.obj$componentid == k]),
             partsum[,k] <- ehatj[,cluster.obj$componentid == k])
    }
    # cluster.obj$mixture$cluster.id.by.partialsum <- apply(partsum, 1, which.max)
    cluster.obj$cluster.id.by.posterior <- max.col(partsum, ties.method = "first")

    # ehat <- apply(ehatj,1,max)
    ehat <- do.call(pmax, as.data.frame(ehatj))

    cluster.id1[!(ehat >= icp.torus$score_ellipse[ialpha])] <- K+1
    cluster.obj$cluster.id.outlier <- cluster.id1

    # Use mahalanobis distance

    mah <- mah.dist2.eval(data, icp.torus$ellipsefit)
    mah[,cluster.obj$componentid == 0] <- Inf

    # maxj.id <- apply(mah, 1, which.min)
    maxj.id <- max.col(-mah, ties.method = "first")
    cluster.id1 <- cluster.obj$componentid[maxj.id]
    cluster.obj$cluster.id.by.Mah.dist <- cluster.id1

  }
  cluster.obj$level <- level
  cluster.obj$data <- data
  cluster.obj$icp.torus <- icp.torus
  return(structure(cluster.obj, class = "cluster.obj"))
}
sungkyujung/ClusTorus documentation built on April 30, 2022, 9:18 p.m.