R/cluster-shows.R

Defines functions calculate_distance

# Script to cluster programmes according to item matrix

library(jsonlite)
library(RPostgreSQL)
library(tidyverse)
library(NbClust)

redshift_creds <- fromJSON("~/Documents/Admin/redshift_credentials.json")

driver <- dbDriver("PostgreSQL")

db_connection <- dbConnect(driver,
                           host = redshift_creds["host_name"],
                           port = redshift_creds["port_num"],
                           dbname = redshift_creds["db_name"],
                           user = redshift_creds["user_name"],
                           password = redshift_creds["password"])

item_matrix <- dbGetQuery(db_connection, "
SELECT * FROM central_insights_sandbox.jf_item_matrix
")

dbDisconnect(db_connection)

# Select latent features
item_matrix_reduced <- item_matrix %>%
  select(tidyselect::matches("f[0-9]"))

# Run the model
set.seed(1234)
nbclust_k <- NbClust(data = item_matrix_reduced, method = "kmeans", index = "silhouette", min.nc = 3)
optimum_k <- nbclust_k$Best.nc[1]


kmodel <- kmeans(item_matrix_reduced, centers = optimum_k, nstart = 25)
cluster_centres <- kmodel$centers
cluster_size <- kmodel$size

item_matrix_reduced$cluster <- as.factor(kmodel$cluster)

calculate_distance <- function() {
  
  # Initialise vector
  distances <- vector()
  
  # Outer loop: Iterate over number of clusters
  for(i in 1:nrow(cluster_centres)) {
    
    # Inner loop: Iterature over size of clusters
    for(j in 1:cluster_size[i]) {
      d <- dist(rbind(item_matrix_reduced[item_matrix_reduced$cluster==i,][j, 1:20],
                      cluster_centres[i]))
      distances <- append(distances, d)
    }
    
  }
  
  # Return vector of distances from cluster centres
  return(distances)
  
}

distances <- calculate_distance()
item_matrix$dist <- distances

# Append data frame with cluster
item_matrix$cluster <- as.factor(kmodel$cluster)

# Save to disk
write.csv(item_matrix, "~/Desktop/latent-variables/item-matrix.csv", row.names = FALSE)
bbc/insights-latent-feature-explorer documentation built on Nov. 3, 2019, 2:08 p.m.