R/assign.R

Defines functions assignment discretize cluster_qr

Documented in assignment cluster_qr discretize

cluster_qr <- function(vectors) {
  k <- ncol(vectors)
  piv <- qr(t(vectors), LAPACK = TRUE)$pivot
  uv <- svd(t(vectors[piv[1:k], 1:ncol(vectors)]))
  vectors <- abs(vectors %*% (uv$u %*% Conj(uv$v)))
  return(max.col(vectors))
}

discretize <-
  function(vectors,
           max_svd_restarts = 30,
           n_iter_max = 20) {
    eps <- .Machine$double.eps
    n_samples <- nrow(vectors)
    n_components <- ncol(vectors)

    # Normalize the eigenvectors to an equal length of a vector of ones.
    # Reorient the eigenvectors to point in the negative direction with respect
    # to the first element.  This may have to do with constraining the
    # eigenvectors to lie in a specific quadrant to make the discretization
    # search easier.
    norm_ones <- sqrt(n_samples)
    for (i in 1:n_components) {
      vectors[, i] <- (vectors[, i] / norm(vectors[, i], "2")) * norm_ones
      if (vectors[1, i] != 0) {
        vectors[, i] <- -1 * vectors[, i] * sign(vectors[1, i])
      }
    }

    # Normalize the rows of the eigenvectors.  Samples should lie on the unit
    # hypersphere centered at the origin.  This transforms the samples in the
    # embedding space to the space of partition matrices.
    vectors <- vectors / sqrt(rowSums(vectors ^ 2))

    svd_restarts <- 0
    has_converged <- FALSE

    # If there is an exception we try to randomize and rerun SVD again
    # do this max_svd_restarts times.
    while ((svd_restarts < max_svd_restarts) &  !has_converged)
    {
      # Initialize first column of rotation matrix with a row of the
      # eigenvectors
      rotation <- matrix(data = 0, n_components, n_components)
      rotation[, 1] <-
        t(vectors[ceiling(stats::runif(1, min = 0, max = n_samples)),])

      # To initialize the rest of the rotation matrix, find the rows
      # of the eigenvectors that are as orthogonal to each other as
      # possible
      c <- rep(0, n_samples)
      for (j in 2:n_components) {
        # Accumulate c to ensure row is as orthogonal as possible to
        # previous picks as well as current one
        c <- c + abs(vectors %*% rotation[1:n_components, j - 1])
        rotation[, j] <- t(vectors[which.min(c),])
      }
      last_objective_value <- 0.0
      n_iter <- 0

      while (!has_converged) {
        n_iter <- n_iter + 1

        t_discrete <- vectors %*% rotation

        labels <- max.col(t_discrete)
        # TODO: Compressed Sparse Column matrix
        vectors_discrete <-
          matrix(data = 0, n_samples, n_components)
        vectors_discrete <- t(vectors_discrete)
        vectors_discrete[(0:(n_samples - 1)) * n_components + labels] <-
          1
        vectors_discrete <- t(vectors_discrete)

        t_svd <- t(vectors_discrete) %*% vectors

        usv <- svd(t_svd)

        ncut_value <- 2.0 * (n_samples - sum(usv$d))
        if ((abs(ncut_value - last_objective_value) < eps) |
            (n_iter > n_iter_max)) {
          has_converged <- TRUE
        }
        else{
          # otherwise calculate rotation and continue
          last_objective_value <- ncut_value
          rotation <- t(usv$v) %*% t(usv$u)
        }
      }
    }
    if (!has_converged) {
      print("SVD did not converge")
    }
    return(labels)
  }

assignment <- function(maps,
                       assign_labels = "kmeans",
                       centers = 8,
                       nstart = 10) {
  if (assign_labels == "kmeans") {
    labels <- stats::kmeans(maps,
                            centers = centers,
                            nstart = nstart)$cluster
  } else if (assign_labels == "cluster_qr") {
    labels <- cluster_qr(maps)
  } else if (assign_labels == "discretize") {
    labels <- discretize(maps)
  }
  return(labels)
}
arthans/SpectralClustering documentation built on Dec. 19, 2021, 4:41 a.m.