cluster_qr <- function(vectors) {
k <- ncol(vectors)
piv <- qr(t(vectors), LAPACK = TRUE)$pivot
uv <- svd(t(vectors[piv[1:k], 1:ncol(vectors)]))
vectors <- abs(vectors %*% (uv$u %*% Conj(uv$v)))
return(max.col(vectors))
}
discretize <-
function(vectors,
max_svd_restarts = 30,
n_iter_max = 20) {
eps <- .Machine$double.eps
n_samples <- nrow(vectors)
n_components <- ncol(vectors)
# Normalize the eigenvectors to an equal length of a vector of ones.
# Reorient the eigenvectors to point in the negative direction with respect
# to the first element. This may have to do with constraining the
# eigenvectors to lie in a specific quadrant to make the discretization
# search easier.
norm_ones <- sqrt(n_samples)
for (i in 1:n_components) {
vectors[, i] <- (vectors[, i] / norm(vectors[, i], "2")) * norm_ones
if (vectors[1, i] != 0) {
vectors[, i] <- -1 * vectors[, i] * sign(vectors[1, i])
}
}
# Normalize the rows of the eigenvectors. Samples should lie on the unit
# hypersphere centered at the origin. This transforms the samples in the
# embedding space to the space of partition matrices.
vectors <- vectors / sqrt(rowSums(vectors ^ 2))
svd_restarts <- 0
has_converged <- FALSE
# If there is an exception we try to randomize and rerun SVD again
# do this max_svd_restarts times.
while ((svd_restarts < max_svd_restarts) & !has_converged)
{
# Initialize first column of rotation matrix with a row of the
# eigenvectors
rotation <- matrix(data = 0, n_components, n_components)
rotation[, 1] <-
t(vectors[ceiling(stats::runif(1, min = 0, max = n_samples)),])
# To initialize the rest of the rotation matrix, find the rows
# of the eigenvectors that are as orthogonal to each other as
# possible
c <- rep(0, n_samples)
for (j in 2:n_components) {
# Accumulate c to ensure row is as orthogonal as possible to
# previous picks as well as current one
c <- c + abs(vectors %*% rotation[1:n_components, j - 1])
rotation[, j] <- t(vectors[which.min(c),])
}
last_objective_value <- 0.0
n_iter <- 0
while (!has_converged) {
n_iter <- n_iter + 1
t_discrete <- vectors %*% rotation
labels <- max.col(t_discrete)
# TODO: Compressed Sparse Column matrix
vectors_discrete <-
matrix(data = 0, n_samples, n_components)
vectors_discrete <- t(vectors_discrete)
vectors_discrete[(0:(n_samples - 1)) * n_components + labels] <-
1
vectors_discrete <- t(vectors_discrete)
t_svd <- t(vectors_discrete) %*% vectors
usv <- svd(t_svd)
ncut_value <- 2.0 * (n_samples - sum(usv$d))
if ((abs(ncut_value - last_objective_value) < eps) |
(n_iter > n_iter_max)) {
has_converged <- TRUE
}
else{
# otherwise calculate rotation and continue
last_objective_value <- ncut_value
rotation <- t(usv$v) %*% t(usv$u)
}
}
}
if (!has_converged) {
print("SVD did not converge")
}
return(labels)
}
assignment <- function(maps,
assign_labels = "kmeans",
centers = 8,
nstart = 10) {
if (assign_labels == "kmeans") {
labels <- stats::kmeans(maps,
centers = centers,
nstart = nstart)$cluster
} else if (assign_labels == "cluster_qr") {
labels <- cluster_qr(maps)
} else if (assign_labels == "discretize") {
labels <- discretize(maps)
}
return(labels)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.