R/fastAglomerativeClustering.R In Leasle/clusterFriend: cluster friends from VK

```#clusters is a list of lists that consist of vectors of elements. Vector contain identificator and vector coordinates
#clusters <- list(list(c("id123", 1, 2, 3)), list(c("id124", 1, 2, 4)))
#distances <- list(list(c("id123", 1, 2, 3)), list(c("id124", 1, 2, 4)), 23)

#normalize of coordinates
normalizeData <- function(vectors) {
maxValues <- apply(vectors, 2, function(col) max(abs(col),1))
normalizedVectors <- t(apply(vectors, 1, function(row) row/maxValues))

normalizedVectors
}

# Calculate distanse between two elements
distance <- function(x,y,w) {
squareSumm <- sum(0, w*(x-y)^2);

return(sqrt(squareSumm))
}

#List of lists with identifiers of elements and distance between them
distanceList <- function(x,w) {
n <- length(x)

clustersList <- list()

for(l in 1:(n-1)) {
for(j in (l+1):n) {
clustersList[[length(clustersList)+1]] <- list(first=x[[l]][[1]],
second=x[[j]][[1]],
distance=distance(lapply(x[[l]],function(el) as.numeric(el[2:length(el)]))[[1]],
lapply(x[[j]],function(el) as.numeric(el[2:length(el)]))[[1]],w))
}
}

return(clustersList)
}

#Pairs of elements and distances between them that less than or equals sigma

for (pairCluster in distances) {
}

}

#Remove clusters with min distance
removeClusters <- function(clusters, pairClusters) {

boolClusters <- rep(0, length(clusters))

for (clusterPair in pairClusters) {
for (cluster in clusterPair) {
booleanList <- lapply(clusters,
function(x) {
equals <- FALSE
for (element in x) {
if (identical(all.equal(element[1], cluster[1]),TRUE))
equals <- TRUE
}

return(equals)
})

l <- 1
while (l <= length(booleanList)) {
boolClusters[l] <- sum(boolClusters[l], booleanList[[l]][1])
l <- l + 1
}
}
}

i <- 1
while(i <= length(boolClusters)){
if(boolClusters[i]) {
clusters[[i]] <- NULL
boolClusters <- boolClusters[-i]
i <- i-1
}
i <- i+1
}

return(clusters)
}

#Append new cluster
newCluster <- list()

if (is.list(pairClusters[[1]]) && is.list(pairClusters[[2]])) {
newCluster <- pairClusters[[1]]

for (cluster in pairClusters[[2]]) {
newCluster[[length(newCluster)+1]] <- cluster
}

}
else if (is.list(pairClusters[[1]]) && !is.list(pairClusters[[2]])) {
pairClusters[[1]][[length(pairClusters[[1]])+1]] <- pairClusters[[2]]
newCluster <- pairClusters[[1]]
}
else if (!is.list(pairClusters[[1]]) && is.list(pairClusters[[2]])) {
pairClusters[[2]][[length(pairClusters[[2]])+1]] <- pairClusters[[1]]
newCluster <- pairClusters[[2]]
}
else newCluster <- list(pairClusters[[1]], pairClusters[[2]])

clusters[[length(clusters)+1]] <- newCluster

return(clusters)
}

#Center element of cluster
centerElementCluster <- function(x) {
clusterMatrix <- sapply(x, function(y){as.numeric(y[2:length(y)])})

centerElement <- clusterMatrix[,1]

if (ncol(clusterMatrix) > 1) {
for (j in 2:ncol(clusterMatrix)) {
centerElement <- centerElement + clusterMatrix[,j]
}
}

centerElement <- centerElement / ncol(clusterMatrix)

return(centerElement)
}

#Calculate distance between new cluster and others
distanceUord <- function(x,y,w) {
xN <- length(x)
yN <- length(y)

centerElementX <- centerElementCluster(x)
centerElementY <- centerElementCluster(y)

distanceClusters <- xN*yN/sum(xN, yN)*distance(centerElementX, centerElementY, w)^2

return(distanceClusters)
}

#Remove distances between two clusters and others
removeDistances <- function(distances, pairClusters) {

massIdentificators <- c()
for (pairIndex in pairClusters) {
if (is.list(pairIndex)) {
for (element in pairIndex) {
massIdentificators <- append(massIdentificators, element[1])
}
} else {
massIdentificators <- append(massIdentificators, pairIndex[1])
}
}

booleanList <- lapply(distances,
function(x) {
equals <- FALSE
for (element in x) {
if (is.list(element)) {
for (elem in element) {
if (elem[1] %in% massIdentificators) {
equals <- TRUE
return(equals)
}
}
} else if (element[1] %in% massIdentificators)
equals <- TRUE
}

return(equals)
}
)

i <- 1
while(i <= length(booleanList)){
if(booleanList[[i]]) {
distances[[i]] <- NULL
booleanList[[i]] <- NULL
i <- i-1
}
i <- i+1
}

return(distances)
}

#Distance between new cluster and others
colClusters <- length(clusters)

if (colClusters > 1) {
for (index in 1:(colClusters-1)) {
distances[[length(distances)+1]] <- list(first=clusters[[index]],
second=clusters[[colClusters]],
distance=distanceUord(clusters[[index]], clusters[[colClusters]],w))

if (distances[[length(distances)]]\$distance <= sigma) {
}
}
}

}

#Read data of elements from JS
library(jsonlite)

stopifnot(nchar(json) >= 5)

tableJsonClusters <- as.matrix(fromJSON(json))
colnames(tableJsonClusters) <- NULL
tableJsonClusters[,2:ncol(tableJsonClusters)] <- normalizeData(tableJsonClusters[,2:ncol(tableJsonClusters)])
View(tableJsonClusters)

clusters <- list()

for (indexRow in 1:nrow(tableJsonClusters)) {
clusters[[length(clusters)+1]] <- list(tableJsonClusters[indexRow,])
}

return(clusters)
}

#Write data of clusters to JSON
writeData <- function(clusters) {
library(jsonlite)

clustersMatrix <- list()

for (index in 1:length(clusters)) {
clustersMatrix[[length(clustersMatrix)+1]] <- t(sapply(clusters[[index]], function(x) x))
}

tableJsonClusters <- toJSON(clustersMatrix)

tableJsonClusters
}

#Save mass of indexes that does not consider
#Or rewrite mass of elements

fastAglomerativeClustering <- function(clusters, weight, sigma, maxCountClusters, minCountClusters) {
distances <- distanceList(clusters, weight)

indexPairMaxDistance <- which.max(lapply(distances, function(x) x\$distance))
maxDistance <- distances[[indexPairMaxDistance]]\$distance
clustersMaxDistance <- clusters

maxDistanceTemp <- maxDistance
maxDiffDistance <- maxDistanceTemp

while (TRUE) {

sigma <- sum(sigma, 0.05)
}

maxDistancePrev <- maxDistanceTemp
indexPairMaxDistanceTemp <- which.max(lapply(distances, function(x) x\$distance))
maxDistanceTemp <- distances[[indexPairMaxDistanceTemp]]\$distance

if (length(clusters) <= maxCountClusters && length(clusters) >= minCountClusters) {
if (abs(maxDistanceTemp - maxDistancePrev) >= maxDiffDistance) {
maxDiffDistance <- abs(maxDistanceTemp - maxDistancePrev)
clustersMaxDistance <- clusters
}
}

clusters <- removeClusters(clusters, pairClusters)
distances <- removeDistances(distances, pairClusters)

if (length(clusters) == 1) {
break()
}

results <- distanceNewCluster(clusters, distances, pairClustersSigma, sigma, weight)

distances <- results[[1]]

rm(results)
}

return(clustersMaxDistance)
}

#' @export
#' @param json of users. Required.
main <- function(jsonClusters) {

stopifnot(length(clusters) >= 10)

weight <- rep(1, length(clusters))
sigma <- 0.01
maxCountClusters <- length(clusters) %/% 2
minCountClusters <- 2

cluster <- fastAglomerativeClustering(clusters, weight, sigma, maxCountClusters, minCountClusters)

result <- writeData(cluster)

return(result)
}
```
Leasle/clusterFriend documentation built on May 8, 2019, 11:19 p.m.